From fb672f4bec334e05e64b1da87a1f466b8d8aff27 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Wed, 18 Feb 2026 18:36:25 -0700 Subject: [PATCH 01/51] initial reference section creation and configuration --- docusaurus.config.ts | 78 +- reference/todo | 2 + reference_versioned_docs/version-v4/todo | 1 + .../version-v4-sidebars.json | 3 + reference_versions.json | 1 + scripts/analyze-pageview-data.mjs | 279 +++ scripts/harper-docs-analytics.csv | 1635 +++++++++++++++++ sidebars.ts | 43 - sidebarsReference.ts | 16 + src/pages/index.mdx | 55 + versioned_sidebars/version-4.1-sidebars.json | 122 -- versioned_sidebars/version-4.2-sidebars.json | 50 - versioned_sidebars/version-4.3-sidebars.json | 50 - versioned_sidebars/version-4.4-sidebars.json | 39 - versioned_sidebars/version-4.5-sidebars.json | 53 - versioned_sidebars/version-4.6-sidebars.json | 53 - versioned_sidebars/version-4.7-sidebars.json | 59 - versions.json | 1 - 18 files changed, 2035 insertions(+), 505 deletions(-) create mode 100644 reference/todo create mode 100644 reference_versioned_docs/version-v4/todo create mode 100644 reference_versioned_sidebars/version-v4-sidebars.json create mode 100644 reference_versions.json create mode 100644 scripts/analyze-pageview-data.mjs create mode 100644 scripts/harper-docs-analytics.csv delete mode 100644 sidebars.ts create mode 100644 sidebarsReference.ts create mode 100644 src/pages/index.mdx delete mode 100644 versioned_sidebars/version-4.1-sidebars.json delete mode 100644 versioned_sidebars/version-4.2-sidebars.json delete mode 100644 versioned_sidebars/version-4.3-sidebars.json delete mode 100644 versioned_sidebars/version-4.4-sidebars.json delete mode 100644 versioned_sidebars/version-4.5-sidebars.json delete mode 100644 versioned_sidebars/version-4.6-sidebars.json delete mode 100644 versioned_sidebars/version-4.7-sidebars.json delete mode 100644 versions.json diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 01456e6d..74078f22 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -22,10 +22,10 @@ const baseUrl = process.env.DOCUSAURUS_BASE_URL || '/'; // Determine route base path for docs // Can be set to '/docs/' if we need docs under a subdirectory // Default is '/' to serve docs at the root -const routeBasePath = process.env.DOCUSAURUS_ROUTE_BASE_PATH || '/docs'; // matching the production URL structure since this will currently affect some relative links in the docs +const routeBasePath = process.env.DOCUSAURUS_ROUTE_BASE_PATH; // matching the production URL structure since this will currently affect some relative links in the docs // URL can also be overridden if needed -const url = process.env.DOCUSAURUS_URL || 'https://docs.harperdb.io'; +const url = process.env.DOCUSAURUS_URL || 'https://docs.harper.fast'; // Always log configuration at build time console.log('Docusaurus URL config:', { url, baseUrl, routeBasePath }); @@ -58,6 +58,13 @@ const config: Config = { onBrokenLinks: 'throw', plugins: [ + [ + '@docusaurus/plugin-content-pages', + { + id: 'default', + path: 'src/pages' + } + ], // Learn documentation [ '@docusaurus/plugin-content-docs', @@ -69,16 +76,16 @@ const config: Config = { editUrl: 'https://github.com/HarperFast/documentation/blob/main/', }, ], - // Main documentation + // Reference documentation [ '@docusaurus/plugin-content-docs', { - id: 'default', - path: './docs', - sidebarPath: './sidebars.ts', - routeBasePath, + id: 'reference', + path: 'reference', + routeBasePath: 'reference', + sidebarPath: './sidebarsReference.ts', editUrl: ({ versionDocsDirPath, docPath }) => { - // For versioned docs: versionDocsDirPath is like 'versioned_docs/version-4.6' + // For versioned docs: versionDocsDirPath is like 'versioned_docs/version-4' // For current docs: versionDocsDirPath is 'docs' if (versionDocsDirPath.startsWith('versioned_docs')) { // Versioned docs are in versioned_docs/version-X.X/ @@ -88,21 +95,11 @@ const config: Config = { return `https://github.com/HarperFast/documentation/blob/main/docs/${docPath}`; } }, - lastVersion: '4.7', + lastVersion: 'current', includeCurrentVersion: false, versions: { - '4.7': { - // No banner for 4.7 as it's the latest stable version - banner: 'none', - }, - '4.6': { - // No banner for 4.6 as it's still actively maintained - banner: 'none', - }, - '4.5': { - // No banner for 4.5 as it's still actively maintained - banner: 'none', - }, + 'current': { label: 'v5', path: 'v5' }, + 'v4': { label: 'v4', path: 'v4', banner: 'none' } }, // Converts npm commands in markdown code blocks to show npm/yarn/pnpm tabs remarkPlugins: [[require('@docusaurus/remark-plugin-npm2yarn'), { sync: true }]], @@ -218,14 +215,14 @@ const config: Config = { }, ], - // Redirects - [ - '@docusaurus/plugin-client-redirects', - { - redirects: generateRedirects(routeBasePath), - createRedirects: (existingPath: string) => createRedirectsBase(existingPath, routeBasePath), - }, - ], + // // Redirects + // [ + // '@docusaurus/plugin-client-redirects', + // { + // redirects: generateRedirects(routeBasePath), + // createRedirects: (existingPath: string) => createRedirectsBase(existingPath, routeBasePath), + // }, + // ], // Sitemap [ @@ -285,6 +282,7 @@ const config: Config = { highlightSearchTermsOnTargetPage: true, searchResultLimits: 8, searchBarPosition: 'right', + docsPluginIdForPreferredVersion: 'reference' }, ], ]), @@ -315,7 +313,7 @@ const config: Config = { alt: 'Harper Logo', src: 'img/HarperPrimaryBlk.svg', srcDark: 'img/HarperPrimaryWht.svg', - href: 'https://www.harper.fast/', + // href: 'https://www.harper.fast/', }, items: [ { @@ -327,9 +325,10 @@ const config: Config = { }, { type: 'docSidebar', - sidebarId: 'docsSidebar', + sidebarId: 'referenceSidebar', + docsPluginId: 'reference', position: 'left', - label: 'Documentation', + label: 'Reference', }, { // Link directly to v4 (current version) instead of overview page @@ -350,6 +349,7 @@ const config: Config = { type: 'docsVersionDropdown', position: 'right', dropdownActiveClassDisabled: true, + docsPluginId: 'reference', }, { href: 'https://github.com/HarperFast/documentation', @@ -366,12 +366,20 @@ const config: Config = { items: [ { label: 'Learn', - to: `/learn`, + to: '/learn', + }, + { + label: 'Reference', + to: '/reference', }, { - label: 'Administration', - to: `${routeBasePath}/administration`, + label: 'Release Notes', + to: '/release-notes/v4-tucker' }, + { + label: 'Fabric', + to: '/fabric' + } ], }, { diff --git a/reference/todo b/reference/todo new file mode 100644 index 00000000..142fa0a3 --- /dev/null +++ b/reference/todo @@ -0,0 +1,2 @@ +future v5 docs +replace with final product of /reference_versioned_docs/version-v4/ \ No newline at end of file diff --git a/reference_versioned_docs/version-v4/todo b/reference_versioned_docs/version-v4/todo new file mode 100644 index 00000000..97322323 --- /dev/null +++ b/reference_versioned_docs/version-v4/todo @@ -0,0 +1 @@ +replace with new reference docs content \ No newline at end of file diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json new file mode 100644 index 00000000..a352dd07 --- /dev/null +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -0,0 +1,3 @@ +{ + "docsSidebar": [] +} diff --git a/reference_versions.json b/reference_versions.json new file mode 100644 index 00000000..e3e45025 --- /dev/null +++ b/reference_versions.json @@ -0,0 +1 @@ +["current", "v4"] diff --git a/scripts/analyze-pageview-data.mjs b/scripts/analyze-pageview-data.mjs new file mode 100644 index 00000000..a529cfd6 --- /dev/null +++ b/scripts/analyze-pageview-data.mjs @@ -0,0 +1,279 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +const csvPath = join(import.meta.dirname, 'harper-docs-analytics.csv'); + +const dataRaw = readFileSync(csvPath, 'utf8'); + +const data = dataRaw.split('\r\n').slice(9); // remove first 9 lines + +// Safe to naively parse CSV since: +// - there are no `,` characters within the header text +data.shift(); // Remove header row +// - nor are there `,` characters within any of the data values +const records = data.map(row => row.split(',')); + +// Parse records into objects with typed values +const pages = records + .filter(row => row.length >= 2 && row[0]) // Filter out empty rows + .map(row => ({ + path: row[0], + views: parseInt(row[1]) || 0, + activeUsers: parseInt(row[2]) || 0, + })); + +// Analysis Functions + +function getTotalViews() { + return pages.reduce((sum, page) => sum + page.views, 0); +} + +function getPathCount() { + return pages.length; +} + +function getViewsByRootPath() { + const rootPaths = {}; + pages.forEach(page => { + const parts = page.path.split('/').filter(Boolean); + const root = parts[0] || 'root'; + if (!rootPaths[root]) { + rootPaths[root] = { views: 0, paths: 0 }; + } + rootPaths[root].views += page.views; + rootPaths[root].paths += 1; + }); + return rootPaths; +} + +function getTopPagesByViews(n) { + return [...pages].sort((a, b) => b.views - a.views).slice(0, n); +} + +function getCumulativeViewPercentages() { + const sorted = [...pages].sort((a, b) => b.views - a.views); + const totalViews = getTotalViews(); + const percentages = []; + let cumulative = 0; + + sorted.forEach((page, index) => { + cumulative += page.views; + const percentage = (cumulative / totalViews) * 100; + percentages.push({ + rank: index + 1, + path: page.path, + views: page.views, + cumulativeViews: cumulative, + cumulativePercentage: percentage, + }); + }); + + return percentages; +} + +function findCoverageThresholds(percentages) { + const thresholds = [50, 75, 80, 90, 95, 99]; + const results = []; + + thresholds.forEach(threshold => { + const index = percentages.findIndex(p => p.cumulativePercentage >= threshold); + if (index !== -1) { + results.push({ + percentage: threshold, + pathCount: index + 1, + totalPaths: pages.length, + pathPercentage: ((index + 1) / pages.length * 100).toFixed(2), + }); + } + }); + + return results; +} + +// Output Functions + +function printTotalStats() { + console.log('Total Statistics'); + console.log('Overview of the entire dataset'); + console.log('Results:'); + console.log(` Total Paths: ${getPathCount().toLocaleString()}`); + console.log(` Total Views: ${getTotalViews().toLocaleString()}`); + console.log(` Average Views per Path: ${(getTotalViews() / getPathCount()).toFixed(2)}`); + console.log(); +} + +function printTopPages(n = 20) { + const top = getTopPagesByViews(n); + const totalViews = getTotalViews(); + + console.log(`Top ${n} Pages by Views`); + console.log(`The most viewed pages and their contribution to total site views`); + console.log('Results:'); + top.forEach((page, index) => { + const percentage = (page.views / totalViews * 100).toFixed(2); + console.log(` ${(index + 1).toString().padStart(2)}. ${page.path}`); + console.log(` ${page.views.toLocaleString()} views (${percentage}%)`); + }); + console.log(); +} + +function printRootPathAnalysis() { + const rootPaths = getViewsByRootPath(); + const totalViews = getTotalViews(); + const mainPaths = ['docs', 'release-notes', 'fabric', 'learn']; + + console.log('Views by Root Path'); + console.log('Distribution of views across top-level paths'); + console.log('Results:'); + + // Display main paths + mainPaths.forEach(root => { + const stats = rootPaths[root]; + if (stats) { + const percentage = (stats.views / totalViews * 100).toFixed(2); + console.log(` /${root}`); + console.log(` Views: ${stats.views.toLocaleString()} (${percentage}%)`); + console.log(` Paths: ${stats.paths.toLocaleString()}`); + console.log(` Avg Views/Path: ${(stats.views / stats.paths).toFixed(2)}`); + } + }); + + // List other root paths + const otherRoots = Object.keys(rootPaths) + .filter(root => !mainPaths.includes(root)) + .sort((a, b) => rootPaths[b].views - rootPaths[a].views); + + if (otherRoots.length > 0) { + console.log(' Other root paths:'); + otherRoots.forEach(root => { + const stats = rootPaths[root]; + const percentage = (stats.views / totalViews * 100).toFixed(2); + console.log(` /${root}: ${stats.views.toLocaleString()} views (${percentage}%), ${stats.paths} paths`); + }); + } + + console.log(); +} + +function printCoverageThresholds() { + const percentages = getCumulativeViewPercentages(); + const thresholds = findCoverageThresholds(percentages); + + console.log('Coverage Analysis'); + console.log('How many paths account for X% of total views'); + console.log('Results:'); + thresholds.forEach(threshold => { + console.log(` ${threshold.percentage}% of views: ${threshold.pathCount} paths (${threshold.pathPercentage}% of all paths)`); + }); + console.log(); +} + +function printViewCountDistribution() { + const totalViews = getTotalViews(); + const totalPaths = pages.length; + + // Create ranges for high traffic + const highTrafficRanges = []; + for (let i = 100; i < 10000; i += 50) { + highTrafficRanges.push({ min: i, max: i + 49 }); + } + + // Calculate stats for each category + const highTrafficPaths = pages.filter(p => p.views >= 100); + const mediumTrafficPaths = pages.filter(p => p.views >= 10 && p.views < 100); + const lowTrafficPaths = pages.filter(p => p.views >= 1 && p.views < 10); + const zeroTrafficPaths = pages.filter(p => p.views === 0); + + const highTrafficViews = highTrafficPaths.reduce((sum, p) => sum + p.views, 0); + const mediumTrafficViews = mediumTrafficPaths.reduce((sum, p) => sum + p.views, 0); + const lowTrafficViews = lowTrafficPaths.reduce((sum, p) => sum + p.views, 0); + + console.log('View Count Distribution'); + console.log('Number of paths grouped by their view count'); + console.log('Results:'); + + // High traffic breakdown + console.log(' High traffic (100+ views):'); + console.log(` Total Paths: ${highTrafficPaths.length} (${(highTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); + console.log(` Total Views: ${highTrafficViews.toLocaleString()} (${(highTrafficViews / totalViews * 100).toFixed(2)}% of views)`); + console.log(` Breakdown by range:`); + + highTrafficRanges.forEach(range => { + const pathsInRange = highTrafficPaths.filter(p => p.views >= range.min && p.views <= range.max); + if (pathsInRange.length > 0) { + const viewsInRange = pathsInRange.reduce((sum, p) => sum + p.views, 0); + console.log(` ${range.min}-${range.max} views: ${pathsInRange.length} paths (${viewsInRange.toLocaleString()} views)`); + } + }); + + // Medium traffic + console.log(' Medium traffic (10-99 views):'); + console.log(` Total Paths: ${mediumTrafficPaths.length} (${(mediumTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); + console.log(` Total Views: ${mediumTrafficViews.toLocaleString()} (${(mediumTrafficViews / totalViews * 100).toFixed(2)}% of views)`); + + // Low traffic + console.log(' Low traffic (1-9 views):'); + console.log(` Total Paths: ${lowTrafficPaths.length} (${(lowTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); + console.log(` Total Views: ${lowTrafficViews.toLocaleString()} (${(lowTrafficViews / totalViews * 100).toFixed(2)}% of views)`); + + // Zero views + console.log(' Zero views:'); + console.log(` Total Paths: ${zeroTrafficPaths.length} (${(zeroTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); + console.log(); +} + +function printLongTailAnalysis() { + const sorted = [...pages].sort((a, b) => b.views - a.views); + const totalViews = getTotalViews(); + const totalPaths = pages.length; + + console.log('Long Tail Analysis'); + console.log('Understanding the distribution of low-traffic pages'); + console.log('Results:'); + + const singleDigitViews = sorted.filter(p => p.views < 10 && p.views > 0); + const singleDigitViewsTotal = singleDigitViews.reduce((sum, p) => sum + p.views, 0); + const singleDigitPercentage = (singleDigitViewsTotal / totalViews * 100).toFixed(2); + + console.log(` Paths with 1-9 views:`); + console.log(` Count: ${singleDigitViews.length} (${(singleDigitViews.length / totalPaths * 100).toFixed(2)}% of paths)`); + console.log(` Total Views: ${singleDigitViewsTotal.toLocaleString()} (${singleDigitPercentage}% of total views)`); + + const zeroViews = sorted.filter(p => p.views === 0); + console.log(` Paths with 0 views:`); + console.log(` Count: ${zeroViews.length} (${(zeroViews.length / totalPaths * 100).toFixed(2)}% of paths)`); + console.log(); +} + +function printRedirectStrategyData() { + const percentages = getCumulativeViewPercentages(); + const totalPaths = pages.length; + + console.log('Redirect Strategy Quick Reference'); + console.log('Key data points for redirect planning'); + console.log('Results:'); + + [10, 25, 50, 100, 200].forEach(n => { + if (n <= totalPaths) { + const data = percentages[n - 1]; + const percentage = data.cumulativePercentage.toFixed(2); + console.log(` Top ${n} paths: ${percentage}% of views`); + } + }); + console.log(); +} + +// Run all analyses +console.log('Harper Docs Analytics Report'); +console.log('Data Period: October 21, 2025 - February 4, 2026'); +console.log(); +console.log('═'.repeat(80)); +console.log(); + +printTotalStats(); +printCoverageThresholds(); +printRedirectStrategyData(); +printTopPages(25); +printRootPathAnalysis(); +printViewCountDistribution(); +printLongTailAnalysis(); \ No newline at end of file diff --git a/scripts/harper-docs-analytics.csv b/scripts/harper-docs-analytics.csv new file mode 100644 index 00000000..3c4d9c37 --- /dev/null +++ b/scripts/harper-docs-analytics.csv @@ -0,0 +1,1635 @@ +# ---------------------------------------- +# Pages and screens: Page path +# Account: Harper +# Property: harper.fast - GA4 +# ---------------------------------------- +# +# All Users +# Start date: 20251021 +# End date: 20260204 +Page path,Views,Active users,Views per active user,Average engagement time per active user,Event count,Key events,Total revenue +/docs,2854,1177,2.4248088360237894,10.819881053525913,7073,31,0 +/docs/developers/operations-api,1028,236,4.3559322033898304,19.588983050847457,1475,5,0 +/docs/category/developers,1027,181,5.6740331491712706,40.96132596685083,1148,16,0 +/docs/developers/applications,727,238,3.0546218487394956,35.668067226890756,1053,2,0 +/docs/reference/resources,667,90,7.411111111111111,162.27777777777777,975,21,0 +/docs/deployments/configuration,608,128,4.75,53.9453125,970,14,0 +/docs/getting-started/quickstart,587,159,3.691823899371069,45.37106918238994,955,5,0 +/docs/developers/rest,547,127,4.307086614173229,47.89763779527559,824,13,0 +/docs/getting-started/installation,530,201,2.63681592039801,62.1044776119403,976,3,0 +/docs/deployments/harper-cli,467,121,3.8595041322314048,51.85123966942149,882,15,0 +/release-notes/v4-tucker,464,95,4.88421052631579,24.54736842105263,710,4,0 +/docs/reference,459,101,4.544554455445544,29.168316831683168,572,10,0 +/docs/developers/applications/defining-schemas,455,150,3.033333333333333,46.67333333333333,804,16,0 +/docs/developers/operations-api/nosql-operations,435,61,7.131147540983607,58.967213114754095,608,7,0 +/docs/developers/applications/caching,410,126,3.253968253968254,56.476190476190474,671,14,0 +/docs/developers/real-time,407,132,3.0833333333333335,36.53030303030303,703,19,0 +/docs/developers/operations-api/databases-and-tables,385,71,5.422535211267606,34.15492957746479,521,9,0 +/docs/developers/operations-api/components,356,59,6.033898305084746,44.52542372881356,511,23,0 +/docs/deployments/install-harper,343,173,1.9826589595375723,26.271676300578033,814,5,0 +/learn,335,146,2.2945205479452055,15.945205479452055,679,1,0 +/docs/developers/replication,328,90,3.6444444444444444,67.54444444444445,473,9,0 +/docs/foundations/harper-architecture,303,98,3.0918367346938775,48.36734693877551,398,2,0 +/fabric,296,82,3.6097560975609757,29.634146341463413,469,3,0 +/docs/developers/operations-api/clustering,295,56,5.267857142857143,47.07142857142857,434,9,0 +/docs/reference/globals,277,66,4.196969696969697,90.72727272727273,470,18,0 +/learn/getting-started/install-and-connect-harper,233,103,2.262135922330097,41.43689320388349,467,0,0 +/docs/foundations/core-concepts,229,72,3.1805555555555554,28.694444444444443,276,4,0 +/docs/developers/applications/data-loader,218,71,3.0704225352112675,60.774647887323944,306,11,0 +/docs/developers/operations-api/system-operations,213,52,4.096153846153846,37.05769230769231,316,7,0 +/docs/reference/components/built-in-extensions,204,60,3.4,23.433333333333334,362,6,0 +/docs/developers/operations-api/configuration,203,53,3.830188679245283,20.641509433962263,275,5,0 +/docs/developers/applications/web-applications,199,72,2.763888888888889,38.611111111111114,293,4,0 +/docs/developers/operations-api/users-and-roles,195,51,3.823529411764706,22.137254901960784,317,13,0 +/docs/developers/security,183,61,3,40.01639344262295,233,0,0 +/docs/reference/resources/instance-binding,181,39,4.641025641025641,47.02564102564103,272,12,0 +/fabric/cluster-creation-management,180,42,4.285714285714286,15.857142857142858,286,1,0 +/learn/getting-started/create-your-first-application,174,57,3.0526315789473686,68.08771929824562,260,0,0 +/docs/reference/components,159,42,3.7857142857142856,26.547619047619047,230,2,0 +/docs/developers/operations-api/advanced-json-sql-examples,158,59,2.6779661016949152,43.08474576271186,226,1,0 +/docs/developers/operations-api/bulk-operations,158,43,3.6744186046511627,40.30232558139535,203,3,0 +/release-notes,156,44,3.5454545454545454,13.159090909090908,298,2,0 +/docs/administration,153,63,2.4285714285714284,18.03174603174603,233,3,0 +/docs/developers/applications/debugging,150,51,2.9411764705882355,48.94117647058823,187,0,0 +/docs/reference/components/plugins,150,40,3.75,59.75,268,11,0 +/docs/deployments/install-harper/linux,149,71,2.0985915492957745,20.52112676056338,296,2,0 +/docs/deployments/harper-cloud,147,55,2.672727272727273,17.12727272727273,225,1,0 +/docs/developers/operations-api/analytics,145,48,3.0208333333333335,17.333333333333332,203,4,0 +/docs/developers/operations-api/quickstart-examples,145,47,3.0851063829787235,35.744680851063826,205,1,0 +/docs/,144,19,7.578947368421052,8.736842105263158,342,4,0 +/docs/developers/applications/define-routes,144,59,2.440677966101695,42.94915254237288,221,1,0 +/docs/foundations/use-cases,141,55,2.5636363636363635,29.381818181818183,180,4,0 +/release-notes/v4-tucker/4.5.35,140,15,9.333333333333334,33.53333333333333,160,0,0 +/docs/reference/analytics,135,57,2.3684210526315788,14.982456140350877,241,7,0 +/docs/developers/replication/sharding,133,47,2.8297872340425534,45.06382978723404,203,5,0 +/docs/developers/operations-api/logs,132,31,4.258064516129032,78.12903225806451,173,12,0 +/docs/reference/dynamic-schema,132,72,1.8333333333333333,28.625,274,4,0 +/docs/administration/harper-studio,130,57,2.280701754385965,15.192982456140351,203,0,0 +/release-notes/v4-tucker/4.5.32,129,15,8.6,117.93333333333334,147,0,0 +/release-notes/v4-tucker/4.5.33,129,9,14.333333333333334,47.888888888888886,152,0,0 +/release-notes/v4-tucker/4.5.34,125,11,11.363636363636363,93.72727272727273,138,0,0 +/release-notes/v4-tucker/4.5.31,122,12,10.166666666666666,76.33333333333333,138,0,0 +/docs/reference/components/applications,121,47,2.574468085106383,19.76595744680851,220,9,0 +/release-notes/v4-tucker/4.5.36,121,18,6.722222222222222,33.388888888888886,178,0,0 +/docs/developers/applications/defining-roles,119,40,2.975,35.975,146,1,0 +/docs/4.1/~gitbook/pdf,114,118,0.9661016949152542,1.7457627118644068,480,0,0 +/docs/developers/operations-api/certificate-management,114,32,3.5625,48.25,159,7,0 +/release-notes/v4-tucker/4.5.30,114,13,8.76923076923077,95.61538461538461,137,0,0 +/docs/developers/operations-api/custom-functions,113,45,2.511111111111111,12.977777777777778,166,3,0 +/docs/developers/operations-api/jobs,113,36,3.138888888888889,20.38888888888889,146,1,0 +/docs/4.3/developers/operations-api/components,111,8,13.875,4.375,197,0,0 +/docs/reference/graphql,109,52,2.0961538461538463,43.84615384615385,197,6,0 +/docs/reference/resources/migration,109,35,3.1142857142857143,41.77142857142857,187,7,0 +/docs/reference/data-types,107,42,2.5476190476190474,22.523809523809526,185,4,0 +/docs/reference/architecture,105,42,2.5,25.523809523809526,144,1,0 +/fabric/managing-applications,104,34,3.0588235294117645,15.147058823529411,145,0,0 +/docs/4.6/developers/operations-api/components,103,5,20.6,73,144,25,0 +/docs/reference/components/extensions,102,41,2.4878048780487805,39.26829268292683,191,6,0 +/docs/developers/security/configuration,98,48,2.0416666666666665,9.833333333333334,203,3,0 +/docs/4.6,97,35,2.7714285714285714,13.085714285714285,159,3,0 +/docs/developers/operations-api/sql-operations,96,39,2.4615384615384617,16.307692307692307,131,0,0 +/docs/developers/security/users-and-roles,93,48,1.9375,28.145833333333332,174,2,0 +/docs/administration/logging/standard-logging,91,25,3.64,32.68,131,1,0 +/fabric/database-management,90,28,3.2142857142857144,15.5,109,1,0 +/docs/administration/cloning,87,45,1.9333333333333333,18.91111111111111,162,7,0 +/fabric/api-documentation,87,35,2.4857142857142858,20.37142857142857,140,0,0 +/fabric/organization-management,87,25,3.48,12.56,95,3,0 +/release-notes/v4-tucker/4.6.0,87,23,3.782608695652174,42.69565217391305,147,5,0 +/release-notes/v4-tucker/4.6.14,87,17,5.117647058823529,14.470588235294118,125,0,0 +/docs/deployments/upgrade-hdb-instance,84,27,3.111111111111111,36.96296296296296,122,4,0 +/docs/developers/security/basic-auth,83,35,2.3714285714285714,13.714285714285714,121,2,0 +/fabric/create-organization,83,27,3.074074074074074,6.296296296296297,93,0,0 +/search,82,11,7.454545454545454,23.90909090909091,91,0,0 +/docs/developers/operations-api/clustering-nats,80,32,2.5,51.03125,120,1,0 +/docs/developers/operations-api/token-authentication,79,26,3.0384615384615383,13.423076923076923,110,0,0 +/docs/reference/transactions,79,33,2.393939393939394,47.42424242424242,158,4,0 +/docs/reference/limits,78,37,2.108108108108108,23.08108108108108,138,0,0 +/release-notes/v4-tucker/4.5.0,78,37,2.108108108108108,17.513513513513512,175,5,0 +/docs/developers/security/jwt-auth,77,36,2.138888888888889,24.25,138,1,0 +/release-notes/v4-tucker/4.7.0,77,16,4.8125,25.1875,89,1,0 +/docs/developers/security/certificate-management,76,37,2.054054054054054,34.2972972972973,150,0,0 +/docs/reference/blob,76,39,1.9487179487179487,17,143,3,0 +/docs/reference/components/configuration,74,38,1.9473684210526316,17.57894736842105,159,4,0 +/docs/administration/logging/audit-logging,72,34,2.1176470588235294,13.941176470588236,137,6,0 +/docs/developers/clustering,72,31,2.3225806451612905,9.064516129032258,82,0,0 +/docs/reference/content-types,70,35,2,17.228571428571428,142,0,0 +/fabric/create-account,70,32,2.1875,13,99,0,0 +/release-notes/v4-tucker/4.5.29,70,10,7,17.5,74,0,0 +/release-notes/v4-tucker/4.6.18,70,13,5.384615384615385,21.923076923076923,95,0,0 +/docs/administration/logging,68,26,2.6153846153846154,16.307692307692307,96,1,0 +/docs/reference/roles,62,23,2.6956521739130435,26.17391304347826,98,4,0 +/docs/4.5,61,23,2.652173913043478,15,118,6,0 +/docs/4.5/developers/operations-api/utilities,61,5,12.2,128.6,72,1,0 +/docs/reference/storage-algorithm,61,29,2.103448275862069,22.517241379310345,123,4,0 +/docs/4.6/deployments/configuration,60,11,5.454545454545454,45.63636363636363,97,0,0 +/docs/developers/operations-api/registration,59,27,2.185185185185185,14.11111111111111,70,0,0 +/docs/administration/compact,56,26,2.1538461538461537,25.192307692307693,110,0,0 +/release-notes/v4-tucker/4.6.15,56,14,4,6.428571428571429,86,2,0 +/docs/reference/resources/query-optimization,55,24,2.2916666666666665,29.041666666666668,87,4,0 +/docs/administration/jobs,54,20,2.7,8.6,69,0,0 +/release-notes/v4-tucker/4.7.3,54,18,3,14.722222222222221,79,2,0 +/docs/developers/sql-guide,53,24,2.2083333333333335,46.458333333333336,72,0,0 +/fabric/logging,52,16,3.25,9.125,68,3,0 +/release-notes/v4-tucker/4.6.13,52,15,3.466666666666667,6.733333333333333,64,0,0 +/release-notes/v4-tucker/4.7.6,49,13,3.769230769230769,10.384615384615385,70,0,0 +/release-notes/v4-tucker/4.5.28,47,13,3.6153846153846154,6.076923076923077,65,0,0 +/docs/developers/security/certificate-verification,46,24,1.9166666666666667,24.541666666666668,89,8,0 +/docs/reference/headers,46,27,1.7037037037037037,19.37037037037037,93,0,0 +/fabric/grafana-integration,46,21,2.1904761904761907,8.904761904761905,68,1,0 +/release-notes/v4-tucker/4.6.16,46,10,4.6,29.2,57,0,0 +/release-notes/v4-tucker/4.6.17,46,14,3.2857142857142856,5.071428571428571,67,0,0 +/docs/administration/harper-studio/create-account,45,27,1.6666666666666667,5.814814814814815,90,0,0 +/docs/administration/logging/transaction-logging,45,22,2.0454545454545454,11.5,81,2,0 +/docs/4.5/developers/operations-api/components,44,3,14.666666666666666,39.666666666666664,53,5,0 +/docs/4.4,43,31,1.3870967741935485,4.387096774193548,124,1,0 +/docs/4.5/deployments/configuration,43,10,4.3,50,67,0,0 +/docs/4.5/developers/operations-api,43,8,5.375,29.5,52,0,0 +/docs/4.6/reference/resources/instance-binding,43,2,21.5,300,58,0,0 +/release-notes/v4-tucker/4.7.2,43,18,2.388888888888889,13.722222222222221,63,0,0 +/learn/developers/coming-soon,42,21,2,23.047619047619047,48,0,0 +/release-notes/v4-tucker/4.4.0,42,26,1.6153846153846154,9.384615384615385,112,3,0 +/docs/4.2,41,37,1.1081081081081081,4.756756756756757,143,0,0 +/docs/administration/harper-studio/login-password-reset,40,25,1.6,24.88,93,2,0 +/release-notes/v4-tucker/4.5.27,40,8,5,29.125,46,0,0 +/docs/4.6/reference/resources,39,6,6.5,34.166666666666664,47,0,0 +/docs/reference/resources/,39,13,3,62.53846153846154,67,4,0 +/release-notes/v4-tucker/4.6.12,39,14,2.7857142857142856,3.9285714285714284,53,0,0 +/docs/4.6/developers/operations-api/clustering,38,2,19,169,43,0,0 +/docs/4.3,37,26,1.4230769230769231,6.038461538461538,101,0,0 +/docs/4.3/developers/applications,37,31,1.1935483870967742,6.645161290322581,118,0,0 +/docs/4.6/reference,37,13,2.8461538461538463,3.5384615384615383,52,1,0 +/release-notes/v4-tucker/4.3.0,37,28,1.3214285714285714,6.75,120,2,0 +/release-notes/v4-tucker/4.4.5,37,33,1.121212121212121,1.4242424242424243,124,0,0 +/docs/4.5/developers/operations-api/databases-and-tables,36,7,5.142857142857143,62.57142857142857,43,0,0 +/docs/4.6/developers/operations-api/databases-and-tables,36,4,9,164.5,46,1,0 +/release-notes/v2-penny,36,32,1.125,2.375,140,0,0 +/release-notes/v4-tucker/4.0.0,36,33,1.0909090909090908,1.3333333333333333,127,0,0 +/release-notes/v4-tucker/4.7.5,36,10,3.6,24,55,2,0 +/docs/administration/harper-studio/instances,35,25,1.4,7.92,90,0,0 +/release-notes/v2-penny/2.2.2,35,35,1,0.9142857142857143,148,0,0 +/release-notes/v4-tucker/4.7.1,35,12,2.9166666666666665,16.916666666666668,41,1,0 +/docs/4.3/developers/sql-guide/reserved-word,33,33,1,2.4242424242424243,132,0,0 +/docs/administration/harper-studio/manage-databases-browse-data,33,24,1.375,5.583333333333333,79,2,0 +/docs/deployments/harper-cloud/alarms,33,21,1.5714285714285714,30.80952380952381,71,0,0 +/docs/developers/components,33,21,1.5714285714285714,2.5238095238095237,103,0,0 +/release-notes/v4-tucker/4.1.0,33,29,1.1379310344827587,7.068965517241379,115,0,0 +/docs/4.6/developers/operations-api/logs,32,4,8,37.5,37,1,0 +/docs/developers/security/mtls-auth,32,22,1.4545454545454546,7.681818181818182,67,1,0 +/docs/4.3/technical-details/reference,31,23,1.3478260869565217,3.391304347826087,109,0,0 +/docs/4.3/technical-details/reference/architecture,31,23,1.3478260869565217,2.608695652173913,104,0,0 +/docs/reference/clustering,31,18,1.7222222222222223,72.83333333333333,59,0,0 +/release-notes/v4-tucker/4.3.10,31,29,1.0689655172413792,0.8620689655172413,117,0,0 +/docs/4.3/technical-details/reference/analytics,30,23,1.3043478260869565,2.0434782608695654,98,0,0 +/release-notes/v4-tucker/4.5.1,30,26,1.1538461538461537,3.1153846153846154,97,0,0 +/docs/administration/harper-studio/manage-applications,29,22,1.3181818181818181,10,82,0,0 +/docs/deployments/harper-cloud/verizon-5g-wavelength-instances,29,24,1.2083333333333333,3.4583333333333335,87,1,0 +/release-notes/v4-tucker/4.2.8,29,26,1.1153846153846154,2.5,104,0,0 +/release-notes/v4-tucker/4.5.26,29,10,2.9,33.4,38,0,0 +/release-notes/v4-tucker/4.7.12,28,11,2.5454545454545454,5.2727272727272725,37,0,0 +/docs/4.6/developers/applications,27,6,4.5,12.833333333333334,35,1,0 +/docs/administration/administration,27,21,1.2857142857142858,1.7142857142857142,89,0,0 +/docs/administration/harper-studio/enable-mixed-content,27,18,1.5,2.6666666666666665,59,0,0 +/release-notes/v1-alby/1.1.0,27,27,1,1.7407407407407407,115,0,0 +/release-notes/v3-monkey,27,22,1.2272727272727273,1.3181818181818181,89,0,0 +/release-notes/v4-tucker/4.3.8,27,26,1.0384615384615385,1.8076923076923077,104,0,0 +/release-notes/v4-tucker/4.4.3,27,26,1.0384615384615385,2.4615384615384617,103,0,0 +/release-notes/v4-tucker/4.5.19,27,15,1.8,4.333333333333333,62,0,0 +/docs/4.2/developers/real-time,26,23,1.1304347826086956,2.4782608695652173,81,0,0 +/docs/administration/harper-studio/instance-metrics,26,18,1.4444444444444444,3.2777777777777777,65,2,0 +/docs/developers/components/built-in,26,19,1.368421052631579,3.1578947368421053,98,0,0 +/docs/reference/sql-guide,26,16,1.625,10.5625,50,0,0 +/release-notes/v2-penny/2.3.1,26,26,1,1.0384615384615385,107,0,0 +/release-notes/v4-tucker/4.3.26,26,22,1.1818181818181819,1.0909090909090908,87,0,0 +/release-notes/v4-tucker/4.5.24,26,9,2.888888888888889,1.4444444444444444,40,0,0 +/release-notes/v4-tucker/4.7.14,26,11,2.3636363636363638,11.545454545454545,35,0,0 +/docs/4.6/developers/operations-api/nosql-operations,25,2,12.5,218,30,0,0 +/docs/developers/components/reference,25,19,1.3157894736842106,3.789473684210526,84,0,0 +/docs/reference/clustering/enabling-clustering,25,11,2.272727272727273,8.181818181818182,52,1,0 +/release-notes/v4-tucker/4.2.0,25,22,1.1363636363636365,8.5,95,1,0 +/release-notes/v4-tucker/4.4.24,25,13,1.9230769230769231,11.461538461538462,48,0,0 +/release-notes/v4-tucker/4.7.13,25,10,2.5,44.9,31,0,0 +/release-notes/v4-tucker/4.7.15,25,11,2.272727272727273,7.636363636363637,38,0,0 +/release-notes/v4-tucker/tucker,25,21,1.1904761904761905,3.2857142857142856,78,0,0 +/docs/4.1,24,24,1,6,84,0,0 +/docs/4.4/developers/applications,24,8,3,17.5,48,0,0 +/docs/4.5/developers/operations-api/logs,24,5,4.8,91.8,31,1,0 +/docs/4.6/administration/harper-studio,24,18,1.3333333333333333,3.2777777777777777,64,0,0 +/release-notes/v4-tucker/4.6.2,24,20,1.2,2.4,75,0,0 +/docs/4.2/developers/operations-api/clustering,23,23,1,1.9565217391304348,88,0,0 +/docs/4.6/developers/applications/defining-schemas,23,9,2.5555555555555554,43.44444444444444,42,1,0 +/docs/reference/sql-guide/json-search,23,12,1.9166666666666667,39.666666666666664,40,1,0 +/release-notes/v1-alby/1.3.0,23,23,1,0.9130434782608695,101,0,0 +/release-notes/v3-monkey/3.0.0,23,24,0.9583333333333334,0.5833333333333334,97,0,0 +/release-notes/v4-tucker/4.1.1,23,23,1,0.6521739130434783,97,0,0 +/release-notes/v4-tucker/4.3.27,23,21,1.0952380952380953,2.7142857142857144,88,0,0 +/release-notes/v4-tucker/4.5.20,23,10,2.3,4.8,39,0,0 +/release-notes/v4-tucker/4.6.1,23,18,1.2777777777777777,11.777777777777779,72,0,0 +/release-notes/v4-tucker/4.6.22,23,9,2.5555555555555554,32.333333333333336,33,0,0 +/release-notes/v4-tucker/4.7.11,23,11,2.090909090909091,4.636363636363637,35,0,0 +/docs/4.2/getting-started,22,16,1.375,3.4375,66,0,0 +/docs/4.3/developers/applications/defining-schemas,22,20,1.1,7.1,84,0,0 +/docs/4.3/developers/operations-api/bulk-operations,22,22,1,3.5454545454545454,87,0,0 +/docs/4.5/developers/applications/caching,22,6,3.6666666666666665,6.166666666666667,33,0,0 +/docs/administration/harper-studio/instance-configuration,22,13,1.6923076923076923,3.769230769230769,33,1,0 +/docs/administration/harper-studio/manage-instance-users,22,17,1.2941176470588236,8.352941176470589,62,0,0 +/docs/technical-details/reference/resource,22,17,1.2941176470588236,3.235294117647059,74,0,0 +/release-notes/v1-alby/1.3.1,22,22,1,0.6818181818181818,93,0,0 +/release-notes/v4-tucker/4.2.7,22,18,1.2222222222222223,1.6666666666666667,73,2,0 +/release-notes/v4-tucker/4.3.32,22,21,1.0476190476190477,1.380952380952381,77,0,0 +/release-notes/v4-tucker/4.5.22,22,10,2.2,2.2,35,0,0 +/release-notes/v4-tucker/4.5.6,22,15,1.4666666666666666,0.9333333333333333,57,0,0 +/docs/4.3/deployments/harperdb-cloud/iops-impact,21,21,1,4.619047619047619,86,0,0 +/docs/4.3/developers/operations-api/utilities,21,14,1.5,10.357142857142858,58,0,0 +/docs/4.6/developers/operations-api/configuration,21,5,4.2,36.4,29,1,0 +/docs/4.6/developers/operations-api/system-operations,21,3,7,38,29,0,0 +/docs/reference/sql-guide/sql-geospatial-functions,21,21,1,2.0476190476190474,74,0,0 +/release-notes/v1-alby,21,17,1.2352941176470589,0.47058823529411764,67,0,0 +/release-notes/v2-penny/2.1.1,21,20,1.05,2.1,85,0,0 +/release-notes/v3-monkey/3.1.1,21,21,1,0.8571428571428571,87,0,0 +/release-notes/v3-monkey/3.3.0,21,17,1.2352941176470589,6.0588235294117645,68,1,0 +/release-notes/v4-tucker/4.3.28,21,16,1.3125,3.1875,56,1,0 +/release-notes/v4-tucker/4.3.31,21,19,1.105263157894737,1,74,0,0 +/release-notes/v4-tucker/4.4.13,21,13,1.6153846153846154,1.3076923076923077,49,0,0 +/release-notes/v4-tucker/4.4.18,21,16,1.3125,3.4375,59,0,0 +/release-notes/v4-tucker/4.5.13,21,11,1.9090909090909092,1,40,0,0 +/release-notes/v4-tucker/4.5.16,21,14,1.5,2.5,54,0,0 +/release-notes/v4-tucker/4.5.21,21,9,2.3333333333333335,1.5555555555555556,30,1,0 +/release-notes/v4-tucker/4.6.11,21,9,2.3333333333333335,2.7777777777777777,32,0,0 +/docs/4.1/install-harperdb,20,12,1.6666666666666667,6.583333333333333,49,0,0 +/docs/4.2/developers/operations-api/databases-and-tables,20,20,1,3.9,83,0,0 +/docs/4.6/developers/rest,20,4,5,53.25,32,0,0 +/docs/administration/harper-studio/query-instance-data,20,11,1.8181818181818181,16.545454545454547,32,2,0 +/docs/deployments/harper-cloud/iops-impact,20,15,1.3333333333333333,16.866666666666667,42,2,0 +/docs/reference/clustering/establishing-routes,20,6,3.3333333333333335,36.666666666666664,26,0,0 +/release-notes/v2-penny/2.3.0,20,22,0.9090909090909091,1.2272727272727273,82,0,0 +/release-notes/v4-tucker/4.3.21,20,18,1.1111111111111112,1.5,67,0,0 +/release-notes/v4-tucker/4.4.17,20,16,1.25,7.8125,55,0,0 +/release-notes/v4-tucker/4.4.6,20,15,1.3333333333333333,2.3333333333333335,59,0,0 +/release-notes/v4-tucker/4.5.9,20,17,1.1764705882352942,1.9411764705882353,62,0,0 +/docs/4.2/developers/rest,19,16,1.1875,16.5,58,0,0 +/docs/4.2/developers/security/users-and-roles,19,20,0.95,2,68,0,0 +/docs/4.3/deployments/harperdb-cli,19,19,1,2.0526315789473686,74,0,0 +/docs/4.3/deployments/install-harperdb,19,16,1.1875,3,62,0,0 +/docs/4.3/getting-started,19,19,1,3.0526315789473686,71,0,0 +/docs/4.6/developers/applications/caching,19,3,6.333333333333333,64,28,0,0 +/docs/4.6/developers/operations-api,19,6,3.1666666666666665,10.5,26,0,0 +/docs/4.6/developers/real-time,19,6,3.1666666666666665,114.5,34,0,0 +/docs/4.6/developers/replication,19,7,2.7142857142857144,12.571428571428571,33,0,0 +/docs/4.6/reference/globals,19,7,2.7142857142857144,91.71428571428571,38,0,0 +/docs/developers/clustering/certificate-management,19,11,1.7272727272727273,5.090909090909091,33,0,0 +/docs/reference/clustering/subscription-overview,19,15,1.2666666666666666,7.8,54,1,0 +/release-notes/v1-alby/1.2.0,19,18,1.0555555555555556,1.0555555555555556,77,0,0 +/release-notes/v4-tucker/4.0.5,19,19,1,1.1578947368421053,80,0,0 +/release-notes/v4-tucker/4.3.19,19,14,1.3571428571428572,2.2857142857142856,55,0,0 +/release-notes/v4-tucker/4.3.34,19,17,1.1176470588235294,3.764705882352941,64,0,0 +/release-notes/v4-tucker/4.5.11,19,12,1.5833333333333333,1.3333333333333333,45,0,0 +/release-notes/v4-tucker/4.5.12,19,10,1.9,1.2,39,0,0 +/release-notes/v4-tucker/4.5.15,19,13,1.4615384615384615,2.923076923076923,42,0,0 +/release-notes/v4-tucker/4.5.23,19,6,3.1666666666666665,2.5,23,0,0 +/release-notes/v4-tucker/4.5.25,19,8,2.375,1.25,28,0,0 +/release-notes/v4-tucker/4.5.3,19,14,1.3571428571428572,2.7857142857142856,54,0,0 +/release-notes/v4-tucker/4.6.21,19,9,2.111111111111111,3.111111111111111,32,0,0 +/docs/4.2/technical-details/reference/architecture,18,11,1.6363636363636365,1.3636363636363635,54,0,0 +/docs/4.3/developers/clustering,18,18,1,2.2777777777777777,70,0,0 +/docs/4.3/technical-details/reference/dynamic-schema,18,18,1,2.2222222222222223,77,0,0 +/docs/4.3/technical-details/reference/resource,18,19,0.9473684210526315,3.1578947368421053,76,0,0 +/docs/4.5/reference/resource,18,7,2.5714285714285716,32.57142857142857,29,0,0 +/docs/4.6/developers/operations-api/analytics,18,9,2,19.444444444444443,41,0,0 +/docs/administration/harper-studio/organizations,18,11,1.6363636363636365,7.363636363636363,40,0,0 +/release-notes/v2-penny/2.2.0,18,18,1,1.3888888888888888,79,0,0 +/release-notes/v3-monkey/3.1.0,18,18,1,1.0555555555555556,77,0,0 +/release-notes/v4-tucker/4.0.1,18,18,1,1.8333333333333333,69,0,0 +/release-notes/v4-tucker/4.3.20,18,9,2,3,35,0,0 +/release-notes/v4-tucker/4.3.29,18,16,1.125,1.5625,65,0,0 +/release-notes/v4-tucker/4.5.17,18,11,1.6363636363636365,0.5454545454545454,37,0,0 +/release-notes/v4-tucker/4.5.7,18,15,1.2,3.3333333333333335,58,0,0 +/release-notes/v4-tucker/4.6.8,18,6,3,9,31,0,0 +/docs/4.1/harperdb-cli,17,12,1.4166666666666667,7.416666666666667,47,0,0 +/docs/4.1/harperdb-studio/organizations,17,17,1,1.2941176470588236,60,0,0 +/docs/4.1/harperdb-studio/resources,17,13,1.3076923076923077,4.846153846153846,56,1,0 +/docs/4.1/install-harperdb/linux,17,17,1,25.764705882352942,64,0,0 +/docs/4.2/developers/operations-api/users-and-roles,17,17,1,4.352941176470588,70,0,0 +/docs/4.2/technical-details/reference,17,9,1.8888888888888888,0.4444444444444444,46,0,0 +/docs/4.3/deployments/harperdb-cloud/verizon-5g-wavelength-instances,17,17,1,1.7058823529411764,66,0,0 +/docs/4.3/technical-details/reference/data-types,17,17,1,2.1176470588235294,71,0,0 +/docs/4.4/developers/operations-api/users-and-roles,17,7,2.4285714285714284,82.14285714285714,34,0,0 +/docs/4.5/administration/logging/transaction-logging,17,9,1.8888888888888888,10.333333333333334,35,0,0 +/docs/4.6/developers/replication/clustering/things-worth-knowing,17,17,1,2.3529411764705883,67,0,0 +/docs/4.6/technical-details/reference/storage-algorithm,17,18,0.9444444444444444,4.333333333333333,76,0,0 +/docs/deployments/harper-cloud/instance-size-hardware-specs,17,11,1.5454545454545454,2.3636363636363638,28,0,0 +/docs/developers/clustering/enabling-clustering,17,11,1.5454545454545454,2.909090909090909,26,0,0 +/docs/v/4.2/administration/cloning,17,18,0.9444444444444444,2.4444444444444446,73,0,0 +/learn/administration/coming-soon,17,11,1.5454545454545454,6.454545454545454,26,0,0 +/release-notes/v3-monkey/3.2.0,17,17,1,0.23529411764705882,70,0,0 +/release-notes/v3-monkey/3.2.1,17,17,1,0.29411764705882354,66,0,0 +/release-notes/v4-tucker/4.0.2,17,17,1,2.3529411764705883,69,0,0 +/release-notes/v4-tucker/4.4.14,17,10,1.7,0.9,37,0,0 +/release-notes/v4-tucker/4.4.8,17,11,1.5454545454545454,1.4545454545454546,39,0,0 +/release-notes/v4-tucker/4.5.37,17,7,2.4285714285714284,3.4285714285714284,20,0,0 +/release-notes/v4-tucker/4.5.38,17,6,2.8333333333333335,11.5,23,0,0 +/robots.txt,17,17,1,10.529411764705882,64,0,0 +/docs/4.1/getting-started,16,13,1.2307692307692308,1,47,0,0 +/docs/4.2/developers/operations-api/quickstart-examples,16,13,1.2307692307692308,4.538461538461538,51,0,0 +/docs/4.2/technical-details/reference/analytics,16,8,2,0.25,41,0,0 +/docs/4.3/administration/harperdb-studio/instance-configuration,16,15,1.0666666666666667,2.933333333333333,60,0,0 +/docs/4.3/deployments/configuration,16,15,1.0666666666666667,4.066666666666666,56,0,0 +/docs/4.3/developers/operations-api/quickstart-examples,16,15,1.0666666666666667,4.666666666666667,60,0,0 +/docs/4.5/developers/operations-api/clustering,16,6,2.6666666666666665,5,21,1,0 +/docs/administration/harperdb-cli,16,16,1,2.6875,66,0,0 +/docs/administration/upgrade-hdb-instance,16,15,1.0666666666666667,4.866666666666666,69,0,0 +/docs/developers/sql-guide/functions,16,9,1.7777777777777777,20.22222222222222,28,1,0 +/docs/getting-started/harper-concepts,16,16,1,2.5,70,0,0 +/docs/reference/sql-guide/functions,16,9,1.7777777777777777,10.88888888888889,34,2,0 +/docs/v/4.1/reference/limits,16,16,1,2.5625,68,0,0 +/docs/v/4.2/developers/applications,16,17,0.9411764705882353,1.3529411764705883,62,0,0 +/docs/v/4.2/technical-details/reference/architecture,16,16,1,4,67,0,0 +/release-notes/v4-tucker/4.2.5,16,16,1,0.875,61,0,0 +/release-notes/v4-tucker/4.3.18,16,13,1.2307692307692308,1.1538461538461537,44,0,0 +/release-notes/v4-tucker/4.4.1,16,10,1.6,12.1,40,0,0 +/release-notes/v4-tucker/4.4.10,16,14,1.1428571428571428,0.6428571428571429,51,0,0 +/release-notes/v4-tucker/4.4.9,16,14,1.1428571428571428,1.2142857142857142,55,0,0 +/release-notes/v4-tucker/4.5.14,16,8,2,0.125,28,0,0 +/release-notes/v4-tucker/4.6.19,16,8,2,0.75,24,0,0 +/release-notes/v4-tucker/4.6.20,16,7,2.2857142857142856,1,23,0,0 +/release-notes/v4-tucker/4.6.9,16,7,2.2857142857142856,3,27,0,0 +/release-notes/v4-tucker/4.7.10,16,9,1.7777777777777777,8.11111111111111,26,0,0 +/technical-details/reference/resources,16,8,2,0.75,42,0,0 +/docs/4.1/release-notes/2.penny/2.3.0,15,15,1,2.8666666666666667,64,0,0 +/docs/4.1/support,15,14,1.0714285714285714,26.714285714285715,54,0,0 +/docs/4.2/developers/security/jwt-auth,15,14,1.0714285714285714,1.0714285714285714,55,0,0 +/docs/4.2/developers/sql-guide/json-search,15,15,1,2.6666666666666665,61,0,0 +/docs/4.2/developers/sql-guide/sql-geospatial-functions,15,15,1,3.8666666666666667,65,0,0 +/docs/4.3/administration/harperdb-studio/organizations,15,15,1,1.9333333333333333,65,0,0 +/docs/4.3/developers/operations-api,15,8,1.875,4.25,31,0,0 +/docs/4.3/developers/real-time,15,15,1,1.7333333333333334,53,0,0 +/docs/4.3/technical-details/reference/content-types,15,18,0.8333333333333334,5.444444444444445,76,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.1.1,15,15,1,2.066666666666667,63,0,0 +/docs/4.5/administration/logging/audit-logging,15,5,3,4.4,20,0,0 +/docs/4.6/developers/replication/sharding,15,2,7.5,165,27,1,0 +/docs/4.6/reference/components/extensions,15,3,5,45.666666666666664,28,0,0 +/docs/administration/harper-studio/manage-instance-roles,15,14,1.0714285714285714,2.7142857142857144,51,0,0 +/docs/developers/components/writing-extensions,15,15,1,1.8,65,0,0 +/docs/developers/replication/clustering/enabling-clustering,15,15,1,3.466666666666667,63,0,0 +/docs/developers/sql-guide/features-matrix,15,9,1.6666666666666667,9.333333333333334,20,0,0 +/docs/reference/clustering/managing-subscriptions,15,8,1.875,7.875,28,0,0 +/docs/reference/sql-guide/date-functions,15,10,1.5,6.1,29,0,0 +/docs/v/4.1/getting-started,15,15,1,4.4,66,0,0 +/docs/v/4.1/harperdb-studio/resources,15,14,1.0714285714285714,4.571428571428571,66,0,0 +/docs/v/4.1/sql-guide/sql-geospatial-functions/geodistance,15,15,1,3.6,66,0,0 +/docs/v/4.1/~gitbook/pdf,15,15,1,2.2666666666666666,58,0,0 +/docs/v/4.2/developers/real-time,15,15,1,3.466666666666667,63,0,0 +/release-notes/v4-tucker/4.3.16,15,12,1.25,3.4166666666666665,47,1,0 +/release-notes/v4-tucker/4.3.17,15,11,1.3636363636363635,2.272727272727273,42,0,0 +/release-notes/v4-tucker/4.3.36,15,13,1.1538461538461537,1.3846153846153846,50,0,0 +/release-notes/v4-tucker/4.3.37,15,13,1.1538461538461537,0.6923076923076923,46,0,0 +/release-notes/v4-tucker/4.3.9,15,14,1.0714285714285714,2.857142857142857,55,0,0 +/release-notes/v4-tucker/4.4.19,15,12,1.25,1.8333333333333333,45,0,0 +/docs/4.1/harperdb-cloud/verizon-5g-wavelength-instances,14,14,1,3.2857142857142856,61,0,0 +/docs/4.1/reference/dynamic-schema,14,13,1.0769230769230769,4.538461538461538,53,0,0 +/docs/4.2/developers/applications/example-projects,14,11,1.2727272727272727,7.636363636363637,42,0,0 +/docs/4.3/deployments/install-harperdb/linux,14,14,1,1.0714285714285714,56,0,0 +/docs/4.3/developers/clustering/enabling-clustering,14,14,1,13.785714285714286,57,0,0 +/docs/4.3/developers/clustering/establishing-routes,14,14,1,0.7857142857142857,54,0,0 +/docs/4.3/developers/clustering/managing-subscriptions,14,14,1,2.0714285714285716,57,0,0 +/docs/4.3/developers/operations-api/nosql-operations,14,9,1.5555555555555556,2.6666666666666665,43,0,0 +/docs/4.3/developers/sql-guide/sql-geospatial-functions,14,14,1,3,63,0,0 +/docs/4.3/technical-details/reference/headers,14,14,1,3.2857142857142856,61,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/1.3.1,14,15,0.9333333333333333,4.066666666666666,64,0,0 +/docs/4.6/administration/logging,14,4,3.5,13,22,1,0 +/docs/developers/components/managing,14,14,1,3.2857142857142856,60,0,0 +/docs/developers/sql-guide/date-functions,14,8,1.75,6.25,17,0,0 +/docs/reference/clustering/things-worth-knowing,14,11,1.2727272727272727,7,41,0,0 +/docs/v/4.1/harperdb-cloud/iops-impact,14,14,1,3.0714285714285716,60,0,0 +/docs/v/4.2/deployments/harperdb-cloud/instance-size-hardware-specs,14,14,1,1.7142857142857142,59,0,0 +/docs/v/4.2/developers/applications/debugging,14,14,1,0.9285714285714286,56,0,0 +/docs/v/4.2/developers/security/users-and-roles,14,15,0.9333333333333333,1.9333333333333333,57,0,0 +/release-notes/v3-monkey/3.1.2,14,13,1.0769230769230769,0.6923076923076923,52,1,0 +/release-notes/v4-tucker/4.0.7,14,14,1,0.5,55,0,0 +/release-notes/v4-tucker/4.2.2,14,14,1,1.4285714285714286,63,0,0 +/release-notes/v4-tucker/4.3.25,14,9,1.5555555555555556,5,35,1,0 +/release-notes/v4-tucker/4.3.30,14,12,1.1666666666666667,1.0833333333333333,41,0,0 +/release-notes/v4-tucker/4.3.7,14,13,1.0769230769230769,1.2307692307692308,50,0,0 +/release-notes/v4-tucker/4.5.18,14,7,2,3.5714285714285716,23,0,0 +/release-notes/v4-tucker/4.5.4,14,9,1.5555555555555556,2.111111111111111,32,0,0 +/release-notes/v4-tucker/4.6.10,14,7,2,0.5714285714285714,22,0,0 +/docs/4.1/clustering/subscription-overview,13,13,1,3.6153846153846154,55,0,0 +/docs/4.1/custom-functions/example-projects,13,13,1,1.2307692307692308,51,0,0 +/docs/4.1/sql-guide/select,13,12,1.0833333333333333,3,46,0,0 +/docs/4.2/administration/cloning,13,11,1.1818181818181819,6.909090909090909,42,0,0 +/docs/4.2/deployments/harperdb-cloud/verizon-5g-wavelength-instances,13,13,1,2.8461538461538463,51,0,0 +/docs/4.2/developers/applications/define-routes,13,13,1,1.5384615384615385,50,0,0 +/docs/4.2/developers/applications/defining-schemas,13,13,1,0.23076923076923078,45,0,0 +/docs/4.2/developers/clustering/certificate-management,13,13,1,0.6153846153846154,53,0,0 +/docs/4.3/developers/components,13,12,1.0833333333333333,4,50,0,0 +/docs/4.3/developers/operations-api/custom-functions,13,13,1,2.3076923076923075,56,0,0 +/docs/4.3/developers/operations-api/registration,13,13,1,3.6153846153846154,51,0,0 +/docs/4.4/administration,13,10,1.3,3.1,39,0,0 +/docs/4.4/developers/miscellaneous/sdks,13,13,1,3.6923076923076925,55,0,0 +/docs/4.4/technical-details/reference,13,7,1.8571428571428572,0.42857142857142855,35,0,0 +/docs/4.4/technical-details/reference/analytics,13,7,1.8571428571428572,1.5714285714285714,36,0,0 +/docs/4.4/technical-details/reference/architecture,13,7,1.8571428571428572,0.5714285714285714,36,0,0 +/docs/4.5/developers/applications,13,7,1.8571428571428572,4.571428571428571,19,0,0 +/docs/4.5/developers/components/built-in,13,6,2.1666666666666665,2.5,19,0,0 +/docs/4.5/developers/operations-api/custom-functions,13,5,2.6,1.8,19,0,0 +/docs/4.5/developers/replication/sharding,13,2,6.5,152.5,22,0,0 +/docs/4.6/developers/miscellaneous/query-optimization,13,13,1,2.230769230769231,55,0,0 +/docs/4.6/getting-started/installation,13,4,3.25,26,20,0,0 +/docs/4.6/reference/analytics,13,5,2.6,75.8,22,0,0 +/docs/4.6/technical-details/reference/resources/instance-binding,13,13,1,3.3846153846153846,56,0,0 +/docs/administration/harperdb-studio/manage-applications,13,12,1.0833333333333333,3.1666666666666665,53,0,0 +/docs/developers/,13,7,1.8571428571428572,0.42857142857142855,35,0,0 +/docs/developers/miscellaneous/sdks,13,13,1,2.923076923076923,54,0,0 +/docs/developers/sql-guide/sql-geospatial-functions,13,10,1.3,26.6,41,0,0 +/docs/v/4.1/security/basic-auth,13,13,1,4.076923076923077,56,0,0 +/docs/v/4.1/sql-guide/reserved-word,13,14,0.9285714285714286,3.4285714285714284,57,0,0 +/release-notes/v4-tucker/4.3.22,13,11,1.1818181818181819,0.9090909090909091,42,0,0 +/release-notes/v4-tucker/4.3.24,13,11,1.1818181818181819,1.5454545454545454,42,0,0 +/release-notes/v4-tucker/4.4.11,13,11,1.1818181818181819,0.5454545454545454,37,0,0 +/release-notes/v4-tucker/4.4.22,13,8,1.625,22.75,30,0,0 +/release-notes/v4-tucker/4.5.5,13,8,1.625,0.75,27,0,0 +/release-notes/v4-tucker/4.5.8,13,11,1.1818181818181819,2.6363636363636362,43,0,0 +/release-notes/v4-tucker/4.6.23,13,6,2.1666666666666665,9.333333333333334,21,0,0 +/docs/4.1/custom-functions/create-project,12,13,0.9230769230769231,2.230769230769231,51,0,0 +/docs/4.1/harperdb-studio/instance-metrics,12,11,1.0909090909090908,3.3636363636363638,41,0,0 +/docs/4.1/reference/headers,12,11,1.0909090909090908,1.3636363636363635,43,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions,12,12,1,0.5833333333333334,46,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geolength,12,12,1,3.75,50,0,0 +/docs/4.2/developers/components/writing-extensions,12,7,1.7142857142857142,4,34,0,0 +/docs/4.2/developers/operations-api/advanced-json-sql-examples,12,12,1,3.6666666666666665,58,0,0 +/docs/4.3/administration/logging/transaction-logging,12,12,1,1.1666666666666667,49,0,0 +/docs/4.3/deployments/upgrade-hdb-instance,12,12,1,2.3333333333333335,49,0,0 +/docs/4.3/developers/operations-api/clustering,12,9,1.3333333333333333,2.6666666666666665,36,0,0 +/docs/4.3/developers/operations-api/sql-operations,12,10,1.2,4.4,41,0,0 +/docs/4.3/developers/security,12,11,1.0909090909090908,4.636363636363637,44,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.0.0,12,12,1,1.5,48,0,0 +/docs/4.4/administration/harper-studio/manage-replication,12,12,1,3.5833333333333335,49,0,0 +/docs/4.5/developers/components,12,6,2,5,18,0,0 +/docs/4.5/developers/components/managing,12,5,2.4,3,19,0,0 +/docs/4.5/reference,12,7,1.7142857142857142,14.857142857142858,18,0,0 +/docs/4.5/reference/globals,12,6,2,5.333333333333333,21,0,0 +/docs/4.5/technical-details/reference,12,6,2,0.5,31,0,0 +/docs/4.5/technical-details/reference/analytics,12,6,2,1.3333333333333333,34,0,0 +/docs/4.5/technical-details/reference/architecture,12,6,2,1.3333333333333333,34,0,0 +/docs/4.6/administration/logging/transaction-logging,12,7,1.7142857142857142,2.7142857142857144,30,0,0 +/docs/4.6/deployments/harper-cloud/alarms,12,12,1,2.5833333333333335,52,0,0 +/docs/4.6/developers/operations-api/users-and-roles,12,9,1.3333333333333333,1.6666666666666667,36,0,0 +/docs/4.6/technical-details,12,13,0.9230769230769231,4.769230769230769,50,0,0 +/docs/developers/clustering/creating-a-cluster-user,12,6,2,7.666666666666667,15,0,0 +/docs/developers/clustering/things-worth-knowing,12,11,1.0909090909090908,2.8181818181818183,48,0,0 +/docs/reference/clustering/creating-a-cluster-user,12,8,1.5,10.875,27,0,0 +/docs/v/4.2/deployments/upgrade-hdb-instance,12,12,1,2.3333333333333335,47,0,0 +/release-notes/v4-tucker/4.2.3,12,12,1,0.16666666666666666,49,0,0 +/release-notes/v4-tucker/4.3.1,12,11,1.0909090909090908,2.5454545454545454,43,0,0 +/release-notes/v4-tucker/4.3.23,12,10,1.2,0.8,35,0,0 +/release-notes/v4-tucker/4.3.35,12,10,1.2,1.5,36,0,0 +/release-notes/v4-tucker/4.4.7,12,9,1.3333333333333333,0.4444444444444444,28,0,0 +/release-notes/v4-tucker/4.5.2,12,10,1.2,1.1,32,0,0 +/release-notes/v4-tucker/4.6.3,12,10,1.2,1,33,0,0 +/release-notes/v4-tucker/4.7.9,12,7,1.7142857142857142,2,18,0,0 +/docs/4.1/custom-functions/define-helpers,11,11,1,2.909090909090909,47,0,0 +/docs/4.1/harperdb-studio,11,11,1,1.2727272727272727,42,0,0 +/docs/4.1/harperdb-studio/instances,11,9,1.2222222222222223,1.3333333333333333,31,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geoconvert,11,12,0.9166666666666666,1.4166666666666667,49,0,0 +/docs/4.2/administration/harperdb-studio/manage-schemas-browse-data,11,12,0.9166666666666666,1.5,44,0,0 +/docs/4.2/administration/harperdb-studio/organizations,11,11,1,0.7272727272727273,46,0,0 +/docs/4.2/developers/components/operations,11,12,0.9166666666666666,2.6666666666666665,45,0,0 +/docs/4.2/reference/analytics,11,9,1.2222222222222223,1.6666666666666667,37,0,0 +/docs/4.3/administration/harperdb-studio/instances,11,11,1,0.18181818181818182,39,0,0 +/docs/4.3/developers/applications/caching,11,11,1,0.45454545454545453,41,0,0 +/docs/4.3/developers/components/writing-extensions,11,11,1,1.2727272727272727,42,0,0 +/docs/4.3/developers/operations-api/databases-and-tables,11,9,1.2222222222222223,13.555555555555555,36,0,0 +/docs/4.5/administration/logging,11,3,3.6666666666666665,9.666666666666666,11,0,0 +/docs/4.5/developers/applications/debugging,11,8,1.375,25.125,33,0,0 +/docs/4.5/reference/data-types,11,5,2.2,11.8,19,0,0 +/docs/4.6/deployments/harper-cli,11,6,1.8333333333333333,152.66666666666666,25,0,0 +/docs/4.6/developers/operations-api/certificate-management,11,5,2.2,7.6,19,0,0 +/docs/4.6/reference/blob,11,7,1.5714285714285714,2.2857142857142856,27,0,0 +/docs/developers/applications/,11,9,1.2222222222222223,19.444444444444443,23,0,0 +/docs/next/getting-started/quickstart,11,4,2.75,10,38,0,0 +/docs/v/4.1/security/jwt-auth,11,11,1,0.8181818181818182,45,0,0 +/docs/v/4.2/developers/components/writing-extensions,11,12,0.9166666666666666,3.4166666666666665,47,0,0 +/docs/v/4.2/technical-details/reference/globals,11,11,1,4,47,0,0 +/release-notes/v3-monkey/3.1.4,11,11,1,1.9090909090909092,50,0,0 +/release-notes/v3-monkey/3.1.5,11,11,1,0.8181818181818182,47,0,0 +/release-notes/v4-tucker/4.0.6,11,11,1,2.1818181818181817,44,0,0 +/release-notes/v4-tucker/4.2.1,11,11,1,1.7272727272727273,48,0,0 +/release-notes/v4-tucker/4.2.6,11,11,1,0.2727272727272727,45,0,0 +/release-notes/v4-tucker/4.3.33,11,9,1.2222222222222223,1.1111111111111112,32,0,0 +/release-notes/v4-tucker/4.3.38,11,9,1.2222222222222223,1,33,0,0 +/release-notes/v4-tucker/4.3.5,11,10,1.1,1.5,39,0,0 +/release-notes/v4-tucker/4.4.15,11,9,1.2222222222222223,5.111111111111111,35,0,0 +/release-notes/v4-tucker/4.4.16,11,9,1.2222222222222223,0.5555555555555556,31,0,0 +/release-notes/v4-tucker/4.5.10,11,8,1.375,0.5,27,0,0 +/release-notes/v4-tucker/4.6.24,11,5,2.2,4.8,16,0,0 +/release-notes/v4-tucker/4.6.6,11,7,1.5714285714285714,3.142857142857143,20,0,0 +/docs/4.1/configuration,10,10,1,2.3,42,0,0 +/docs/4.1/custom-functions/templates,10,9,1.1111111111111112,0.8888888888888888,29,0,0 +/docs/4.1/harperdb-studio/manage-charts,10,10,1,3.1,38,0,0 +/docs/4.1/security/basic-auth,10,9,1.1111111111111112,2.5555555555555554,39,0,0 +/docs/4.2/developers/applications,10,9,1.1111111111111112,2.6666666666666665,31,0,0 +/docs/4.2/developers/components/sdks,10,8,1.25,9.875,33,0,0 +/docs/4.2/developers/security,10,10,1,0.9,37,0,0 +/docs/4.2/developers/security/basic-auth,10,8,1.25,7.875,34,0,0 +/docs/4.3/administration/harperdb-studio/manage-charts,10,11,0.9090909090909091,2.3636363636363638,43,0,0 +/docs/4.3/administration/harperdb-studio/manage-replication,10,10,1,3.7,42,0,0 +/docs/4.3/developers/applications/example-projects,10,8,1.25,7.5,28,0,0 +/docs/4.3/developers/components/sdks,10,10,1,1,40,0,0 +/docs/4.3/reference/globals,10,10,1,2.1,40,0,0 +/docs/4.4/administration/harper-studio/manage-instance-roles,10,7,1.4285714285714286,6,30,0,0 +/docs/4.4/developers/clustering/naming-a-node,10,5,2,2.2,27,0,0 +/docs/4.4/reference/roles,10,8,1.25,27.375,29,0,0 +/docs/4.5/foundations/harper-architecture,10,2,5,21.5,13,0,0 +/docs/4.6/administration/harper-studio/organizations,10,9,1.1111111111111112,5.555555555555555,34,0,0 +/docs/4.6/administration/logging/audit-logging,10,5,2,4.8,23,0,0 +/docs/4.6/developers/applications/data-loader,10,4,2.5,3.25,10,0,0 +/docs/4.6/developers/security,10,8,1.25,7.875,28,0,0 +/docs/4.6/developers/security/configuration,10,3,3.3333333333333335,106.33333333333333,16,0,0 +/docs/4.6/getting-started/quickstart,10,8,1.25,3.125,23,0,0 +/docs/4.6/reference/components/built-in-extensions,10,4,2.5,22,15,0,0 +/docs/4.6/reference/components/plugins,10,3,3.3333333333333335,34.666666666666664,17,1,0 +/docs/reference/clustering/certificate-management,10,8,1.25,4.375,26,1,0 +/docs/v/4.2/developers/sql-guide/sql-geospatial-functions,10,10,1,1.8,43,0,0 +/release-notes/v2-penny/2.2.3,10,10,1,0.8,43,0,0 +/release-notes/v3-monkey/3.1.3,10,10,1,0.6,43,0,0 +/release-notes/v4-tucker/4.0.3,10,10,1,0.9,41,0,0 +/release-notes/v4-tucker/4.1.2,10,10,1,0.8,40,0,0 +/release-notes/v4-tucker/4.4.12,10,7,1.4285714285714286,1.8571428571428572,25,0,0 +/release-notes/v4-tucker/4.4.20,10,9,1.1111111111111112,0.7777777777777778,31,0,0 +/release-notes/v4-tucker/4.4.21,10,8,1.25,0.375,28,0,0 +/release-notes/v4-tucker/4.4.4,10,8,1.25,1.625,28,0,0 +/release-notes/v4-tucker/4.7.16,10,4,2.5,4.75,13,0,0 +/release-notes/v4-tucker/4.7.7,10,5,2,2.2,12,0,0 +/release-notes/v4-tucker/4.7.8,10,7,1.4285714285714286,0.42857142857142855,17,0,0 +/docs/4.1/custom-functions/custom-functions-operations,9,8,1.125,3,32,0,0 +/docs/4.1/harperdb-studio/manage-functions,9,9,1,1.3333333333333333,35,0,0 +/docs/4.2/,9,9,1,0.5555555555555556,32,0,0 +/docs/4.2/administration/harperdb-studio,9,9,1,3.888888888888889,38,0,0 +/docs/4.2/administration/harperdb-studio/instances,9,9,1,0.4444444444444444,31,0,0 +/docs/4.2/administration/logging/transaction-logging,9,9,1,1.8888888888888888,32,0,0 +/docs/4.2/deployments/harperdb-cloud/iops-impact,9,9,1,1.6666666666666667,39,0,0 +/docs/4.2/developers/applications/caching,9,9,1,0.5555555555555556,34,0,0 +/docs/4.2/developers/operations-api/utilities,9,9,1,0.7777777777777778,36,0,0 +/docs/4.3/developers/components/installing,9,9,1,2.5555555555555554,35,0,0 +/docs/4.3/developers/security/basic-auth,9,9,1,2.2222222222222223,39,0,0 +/docs/4.3/developers/security/users-and-roles,9,9,1,2.3333333333333335,34,0,0 +/docs/4.3/reference/resource,9,6,1.5,60.5,22,2,0 +/docs/4.5/administration/compact,9,3,3,1,17,0,0 +/docs/4.5/administration/harper-studio/manage-instance-roles,9,9,1,2.6666666666666665,39,0,0 +/docs/4.5/administration/logging/standard-logging,9,2,4.5,43.5,9,0,0 +/docs/4.5/developers/components/reference,9,5,1.8,10,13,0,0 +/docs/4.5/developers/operations-api/clustering-nats,9,3,3,10.333333333333334,11,0,0 +/docs/4.5/reference/sql-guide,9,5,1.8,1.4,22,0,0 +/docs/4.6/administration/logging/standard-logging,9,4,2.25,11.75,18,0,0 +/docs/4.6/developers/operations-api/custom-functions,9,4,2.25,9.25,13,0,0 +/docs/4.6/foundations/harper-architecture,9,2,4.5,2,12,0,0 +/docs/4.6/reference/resources/migration,9,3,3,37,16,0,0 +/docs/4.6/reference/resources/query-optimization,9,2,4.5,162.5,10,0,0 +/docs/4.6/reference/roles,9,4,2.25,1.25,12,0,0 +/docs/administration/harperdb-studio/,9,9,1,0.1111111111111111,36,0,0 +/docs/deployments,9,9,1,2.5555555555555554,34,0,0 +/docs/deployments/install-harper/,9,6,1.5,40.666666666666664,19,0,0 +/docs/developers/sql-guide/reserved-word,9,5,1.8,7.4,13,0,0 +/docs/reference/clustering/naming-a-node,9,6,1.5,7.666666666666667,21,0,0 +/release-notes/v4-tucker/4.0.4,9,10,0.9,0.9,40,0,0 +/release-notes/v4-tucker/4.2.4,9,9,1,1.1111111111111112,35,0,0 +/release-notes/v4-tucker/4.3.14,9,9,1,2.2222222222222223,35,0,0 +/release-notes/v4-tucker/4.3.4,9,7,1.2857142857142858,1,25,0,0 +/release-notes/v4-tucker/4.4.23,9,8,1.125,3,27,0,0 +/docs/4.1/harperdb-cloud/iops-impact,8,8,1,2,33,0,0 +/docs/4.1/harperdb-studio/instance-example-code,8,7,1.1428571428571428,0.8571428571428571,25,0,0 +/docs/4.1/harperdb-studio/query-instance-data,8,8,1,4.625,34,0,0 +/docs/4.1/logging,8,8,1,2,30,0,0 +/docs/4.1/security/users-and-roles,8,7,1.1428571428571428,2.7142857142857144,28,1,0 +/docs/4.2/administration,8,8,1,1.75,27,0,0 +/docs/4.2/administration/harperdb-studio/manage-clustering,8,8,1,1.625,34,0,0 +/docs/4.2/deployments,8,8,1,3.875,32,0,0 +/docs/4.2/deployments/harperdb-cloud,8,8,1,0,30,0,0 +/docs/4.2/developers/components/drivers,8,7,1.1428571428571428,1,24,0,0 +/docs/4.2/developers/operations-api/nosql-operations,8,7,1.1428571428571428,56.42857142857143,28,0,0 +/docs/4.2/developers/security/configuration,8,8,1,0.25,32,0,0 +/docs/4.3/administration/harperdb-studio,8,7,1.1428571428571428,3,30,0,0 +/docs/4.3/deployments/harperdb-cloud,8,8,1,0.5,35,0,0 +/docs/4.3/developers/sql-guide,8,8,1,1,32,0,0 +/docs/4.4/administration/harper-studio/organizations,8,8,1,0.875,31,0,0 +/docs/4.4/deployments/install-harper/,8,8,1,0.75,33,0,0 +/docs/4.4/deployments/upgrade-hdb-instance,8,8,1,2.25,30,0,0 +/docs/4.5/developers/applications/defining-roles,8,5,1.6,24.6,14,0,0 +/docs/4.5/developers/applications/web-applications,8,6,1.3333333333333333,5.333333333333333,23,1,0 +/docs/4.5/developers/security/certificate-management,8,3,2.6666666666666665,7.666666666666667,20,0,0 +/docs/4.5/getting-started/,8,4,2,0.5,20,0,0 +/docs/4.6/administration,8,6,1.3333333333333333,0.16666666666666666,15,0,0 +/docs/4.6/administration/harper-studio/manage-applications,8,7,1.1428571428571428,6.714285714285714,26,0,0 +/docs/4.6/deployments/install-harper,8,5,1.6,6.8,14,0,0 +/docs/4.6/developers/applications/defining-roles,8,2,4,36.5,13,0,0 +/docs/4.6/developers/security/jwt-auth,8,3,2.6666666666666665,208.33333333333334,17,1,0 +/docs/4.6/developers/security/mtls-auth,8,8,1,2.75,36,0,0 +/docs/4.6/reference/components/configuration,8,3,2.6666666666666665,8,10,0,0 +/docs/developers/clustering/subscription-overview,8,6,1.3333333333333333,13.5,24,1,0 +/docs/developers/sql-guide/json-search,8,4,2,3.75,9,1,0 +/docs/getting-started/what-is-harper,8,6,1.3333333333333333,0.6666666666666666,28,0,0 +/release-notes/v4-tucker/4.3.11,8,6,1.3333333333333333,2,22,0,0 +/release-notes/v4-tucker/4.6.7,8,5,1.6,2.6,13,0,0 +/release-notes/v4-tucker/4.7.17,8,4,2,0.75,13,0,0 +/docs/4.1/,7,7,1,2.857142857142857,31,0,0 +/docs/4.1/sql-guide/insert,7,7,1,1.5714285714285714,33,0,0 +/docs/4.1/sql-guide/joins,7,7,1,1.1428571428571428,27,0,0 +/docs/4.1/sql-guide/reserved-word,7,7,1,2.2857142857142856,26,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geodifference,7,7,1,2.5714285714285716,33,0,0 +/docs/4.2/administration/harperdb-studio/manage-functions,7,7,1,1.7142857142857142,32,0,0 +/docs/4.2/administration/harperdb-studio/manage-instance-roles,7,7,1,3.2857142857142856,29,0,0 +/docs/4.2/developers/applications/debugging,7,7,1,0.7142857142857143,28,0,0 +/docs/4.3/administration/harperdb-studio/manage-databases-browse-data,7,7,1,0.8571428571428571,28,0,0 +/docs/4.3/developers/operations-api/advanced-json-sql-examples,7,7,1,4.285714285714286,30,0,0 +/docs/4.3/developers/operations-api/token-authentication,7,5,1.4,1.2,23,1,0 +/docs/4.3/reference/data-types,7,6,1.1666666666666667,2.8333333333333335,22,0,0 +/docs/4.5/administration/harper-studio/instance-configuration,7,7,1,0.2857142857142857,29,0,0 +/docs/4.5/administration/harper-studio/manage-replication,7,7,1,2.142857142857143,26,0,0 +/docs/4.5/developers/applications/defining-schemas,7,3,2.3333333333333335,27.333333333333332,10,0,0 +/docs/4.5/developers/operations-api/jobs,7,2,3.5,126,9,0,0 +/docs/4.5/developers/rest,7,3,2.3333333333333335,2.6666666666666665,12,0,0 +/docs/4.5/reference/analytics,7,4,1.75,2,15,0,0 +/docs/4.5/reference/content-types,7,3,2.3333333333333335,1.3333333333333333,7,0,0 +/docs/4.5/reference/dynamic-schema,7,4,1.75,0.25,11,0,0 +/docs/4.5/reference/headers,7,4,1.75,3.5,14,0,0 +/docs/4.5/reference/limits,7,3,2.3333333333333335,7,11,0,0 +/docs/4.5/reference/query-optimization,7,2,3.5,5.5,7,0,0 +/docs/4.5/reference/transactions,7,5,1.4,1,17,0,0 +/docs/4.6/deployments/upgrade-hdb-instance,7,5,1.4,2,19,0,0 +/docs/4.6/developers/applications/debugging,7,4,1.75,1.75,13,0,0 +/docs/4.6/developers/operations-api/clustering-nats,7,3,2.3333333333333335,13.333333333333334,13,0,0 +/docs/4.6/developers/operations-api/sql-operations,7,3,2.3333333333333335,1.3333333333333333,13,0,0 +/docs/4.6/foundations/core-concepts,7,3,2.3333333333333335,5,10,0,0 +/docs/4.6/reference/clustering,7,1,7,78,7,0,0 +/docs/administration/harperdb-studio,7,8,0.875,5.875,32,0,0 +/docs/deployments/,7,7,1,0.42857142857142855,29,0,0 +/docs/getting-started/first-harper-app,7,3,2.3333333333333335,0,18,0,0 +/release-notes/v4-tucker/4.3.12,7,6,1.1666666666666667,0.8333333333333334,24,0,0 +/release-notes/v4-tucker/4.3.13,7,6,1.1666666666666667,0.5,22,0,0 +/release-notes/v4-tucker/4.3.15,7,6,1.1666666666666667,0.3333333333333333,22,0,0 +/release-notes/v4-tucker/4.3.6,7,6,1.1666666666666667,3.5,24,0,0 +/release-notes/v4-tucker/4.4.2,7,6,1.1666666666666667,3.6666666666666665,21,0,0 +/release-notes/v4-tucker/4.6.5,7,4,1.75,2,11,0,0 +/release-notes/v4-tucker/4.7.4,7,5,1.4,2.8,17,1,0 +/docs/4.1/add-ons-and-sdks,6,4,1.5,2.25,15,0,0 +/docs/4.1/clustering/certificate-management,6,6,1,1.8333333333333333,25,0,0 +/docs/4.1/custom-functions/using-npm-git,6,7,0.8571428571428571,1.2857142857142858,27,0,0 +/docs/4.1/harperdb-studio/login-password-reset,6,6,1,0.5,19,0,0 +/docs/4.1/harperdb-studio/manage-clustering,6,5,1.2,0.2,17,0,0 +/docs/4.1/security/certificate-management,6,6,1,0.3333333333333333,23,0,0 +/docs/4.1/sql-guide/features-matrix,6,6,1,4,22,0,0 +/docs/4.2/administration/logging/audit-logging,6,6,1,1.5,26,0,0 +/docs/4.2/deployments/,6,6,1,0.3333333333333333,25,0,0 +/docs/4.2/developers/clustering/things-worth-knowing,6,6,1,0.5,24,0,0 +/docs/4.2/developers/components/google-data-studio,6,6,1,1.5,29,0,0 +/docs/4.2/developers/operations-api,6,6,1,0.8333333333333334,26,0,0 +/docs/4.2/developers/operations-api/jobs,6,6,1,0.16666666666666666,21,0,0 +/docs/4.2/developers/operations-api/sql-operations,6,5,1.2,9.6,18,0,0 +/docs/4.2/reference/headers,6,6,1,4.333333333333333,24,0,0 +/docs/4.3/administration,6,6,1,1.5,23,0,0 +/docs/4.3/administration/compact,6,6,1,2.6666666666666665,24,0,0 +/docs/4.3/administration/harperdb-studio/manage-applications,6,6,1,2.6666666666666665,23,0,0 +/docs/4.3/developers/applications/define-routes,6,6,1,1,23,0,0 +/docs/4.3/developers/clustering/things-worth-knowing,6,6,1,0.5,24,0,0 +/docs/4.3/developers/operations-api/jobs,6,5,1.2,0.4,16,0,0 +/docs/4.3/developers/security/configuration,6,6,1,2.3333333333333335,27,0,0 +/docs/4.3/reference/transactions,6,5,1.2,5.8,17,1,0 +/docs/4.4/administration/harper-studio/manage-instance-users,6,3,2,9,13,0,0 +/docs/4.4/administration/logging/transaction-logging,6,6,1,3.3333333333333335,26,0,0 +/docs/4.4/deployments/configuration,6,4,1.5,3,16,0,0 +/docs/4.4/deployments/install-harper,6,5,1.2,0.6,20,0,0 +/docs/4.4/developers/applications/caching,6,5,1.2,21.6,16,0,0 +/docs/4.5/administration/harper-studio,6,4,1.5,2.25,11,0,0 +/docs/4.5/developers/clustering,6,5,1.2,0.2,16,0,0 +/docs/4.5/developers/operations-api/users-and-roles,6,3,2,2.6666666666666665,7,0,0 +/docs/4.5/developers/real-time,6,3,2,54,13,0,0 +/docs/4.6/administration/harper-studio/manage-replication,6,6,1,0.5,25,0,0 +/docs/4.6/deployments/harper-cloud/instance-size-hardware-specs,6,6,1,4,23,0,0 +/docs/4.6/deployments/install-harper/linux,6,4,1.5,2.75,14,0,0 +/docs/4.6/developers/operations-api/bulk-operations,6,3,2,3.6666666666666665,10,0,0 +/docs/4.6/developers/operations-api/jobs,6,2,3,2.5,9,0,0 +/docs/4.6/developers/operations-api/quickstart-examples,6,4,1.5,43,13,0,0 +/docs/4.6/developers/operations-api/registration,6,4,1.5,3.25,17,0,0 +/docs/4.6/developers/security/users-and-roles,6,6,1,1.1666666666666667,19,0,0 +/docs/4.6/reference/architecture,6,5,1.2,11.2,9,1,0 +/docs/4.6/reference/components,6,4,1.5,0.25,8,0,0 +/docs/4.6/reference/headers,6,5,1.2,1.4,19,0,0 +/docs/administration/harper-studio/manage-replication,6,5,1.2,1.6,18,0,0 +/docs/deployments/harper-cloud/,6,7,0.8571428571428571,6.714285714285714,27,0,0 +/docs/developers/clustering/naming-a-node,6,4,1.5,4.25,14,0,0 +/docs/reference/clustering/requirements-and-definitions,6,5,1.2,4.2,20,0,0 +/docs/reference/sql-guide/reserved-word,6,4,1.5,17.5,15,1,0 +/docs/v/4.1/add-ons-and-sdks,6,6,1,2.3333333333333335,25,0,0 +/release-notes/v4-tucker/4.3.3,6,5,1.2,0.8,19,0,0 +/release-notes/v4-tucker/4.6.4,6,4,1.5,1.25,12,0,0 +/docs/4.1/clustering,5,5,1,14.2,19,0,0 +/docs/4.1/clustering/enabling-clustering,5,5,1,0.4,19,0,0 +/docs/4.1/clustering/managing-subscriptions,5,5,1,0.6,20,0,0 +/docs/4.1/clustering/things-worth-knowing,5,3,1.6666666666666667,2,13,0,0 +/docs/4.1/custom-functions/debugging-custom-function,5,5,1,2.4,21,0,0 +/docs/4.1/custom-functions/host-static,5,5,1,4.4,24,0,0 +/docs/4.1/custom-functions/restarting-server,5,5,1,1.6,21,0,0 +/docs/4.1/harperdb-studio/enable-mixed-content,5,5,1,0.6,20,0,0 +/docs/4.1/harperdb-studio/manage-schemas-browse-data,5,5,1,2.2,19,0,0 +/docs/4.1/reference/storage-algorithm,5,5,1,1.2,20,0,0 +/docs/4.1/security/configuration,5,5,1,4,21,0,0 +/docs/4.1/security/jwt-auth,5,5,1,1.6,22,0,0 +/docs/4.1/sql-guide/functions,5,5,1,0.8,21,0,0 +/docs/4.1/transaction-logging,5,5,1,0.4,18,0,0 +/docs/4.1/upgrade-hdb-instance,5,5,1,0.2,19,0,0 +/docs/4.2/administration/harperdb-studio/instance-configuration,5,5,1,0,20,0,0 +/docs/4.2/deployments/harperdb-cli,5,4,1.25,21.5,16,0,0 +/docs/4.2/deployments/install-harperdb/,5,5,1,0.4,18,0,0 +/docs/4.2/deployments/upgrade-hdb-instance,5,5,1,2,22,0,0 +/docs/4.2/developers/clustering/creating-a-cluster-user,5,5,1,1.4,21,0,0 +/docs/4.2/developers/clustering/establishing-routes,5,5,1,1.6,24,0,0 +/docs/4.2/developers/clustering/naming-a-node,5,5,1,0,20,0,0 +/docs/4.2/developers/components,5,5,1,0,18,0,0 +/docs/4.2/developers/components/installing,5,5,1,0.8,18,0,0 +/docs/4.2/developers/operations-api/bulk-operations,5,5,1,0.8,19,0,0 +/docs/4.2/developers/operations-api/components,5,5,1,0.8,19,0,0 +/docs/4.2/developers/operations-api/logs,5,5,1,1,19,0,0 +/docs/4.2/reference/dynamic-schema,5,5,1,1.2,18,0,0 +/docs/4.2/reference/resource,5,4,1.25,0.75,17,0,0 +/docs/4.2/technical-details/reference/headers,5,5,1,1.6,20,0,0 +/docs/4.3/administration/harperdb-studio/login-password-reset,5,4,1.25,2.25,16,0,0 +/docs/4.3/administration/harperdb-studio/manage-instance-roles,5,5,1,2,23,0,0 +/docs/4.3/administration/harperdb-studio/manage-instance-users,5,5,1,0.8,22,0,0 +/docs/4.3/administration/logging,5,4,1.25,2.75,15,0,0 +/docs/4.3/developers/applications/debugging,5,5,1,0,20,0,0 +/docs/4.3/developers/clustering/naming-a-node,5,5,1,0.8,21,0,0 +/docs/4.3/developers/clustering/requirements-and-definitions,5,5,1,0.4,22,0,0 +/docs/4.3/developers/operations-api/logs,5,2,2.5,0.5,8,0,0 +/docs/4.3/developers/security/jwt-auth,5,4,1.25,0.25,14,0,0 +/docs/4.4/,5,5,1,1.8,18,0,0 +/docs/4.4/administration/harper-studio/manage-applications,5,5,1,1.6,21,0,0 +/docs/4.4/administration/logging/standard-logging,5,4,1.25,0.75,13,0,0 +/docs/4.4/developers/components/built-in,5,5,1,1.8,20,0,0 +/docs/4.4/developers/components/managing,5,5,1,3,23,0,0 +/docs/4.4/developers/operations-api/databases-and-tables,5,3,1.6666666666666667,6.666666666666667,11,0,0 +/docs/4.4/developers/operations-api/nosql-operations,5,4,1.25,2.75,16,0,0 +/docs/4.4/developers/real-time,5,4,1.25,60.5,12,0,0 +/docs/4.4/developers/security/mtls-auth,5,3,1.6666666666666667,5.333333333333333,15,0,0 +/docs/4.5/administration,5,4,1.25,3.25,11,0,0 +/docs/4.5/administration/cloning,5,5,1,0.6,15,0,0 +/docs/4.5/developers/clustering/naming-a-node,5,4,1.25,0.25,15,0,0 +/docs/4.5/developers/operations-api/quickstart-examples,5,3,1.6666666666666667,5.333333333333333,13,0,0 +/docs/4.5/developers/replication,5,3,1.6666666666666667,7.333333333333333,8,0,0 +/docs/4.5/developers/security,5,4,1.25,0,11,0,0 +/docs/4.5/foundations/core-concepts,5,1,5,194,5,0,0 +/docs/4.6/administration/harper-studio/enable-mixed-content,5,4,1.25,1,15,0,0 +/docs/4.6/administration/harper-studio/instance-configuration,5,5,1,0.8,21,0,0 +/docs/4.6/administration/harper-studio/instances,5,5,1,2.4,19,0,0 +/docs/4.6/developers/applications/define-routes,5,4,1.25,0.75,14,0,0 +/docs/4.6/developers/applications/example-projects,5,5,1,1.8,21,0,0 +/docs/4.6/developers/operations-api/advanced-json-sql-examples,5,4,1.25,4.5,11,0,0 +/docs/4.6/developers/sql-guide,5,3,1.6666666666666667,23.333333333333332,8,0,0 +/docs/4.6/getting-started,5,6,0.8333333333333334,13,29,0,0 +/docs/4.6/reference/clustering/subscription-overview,5,2,2.5,22,7,0,0 +/docs/4.6/reference/content-types,5,3,1.6666666666666667,13,9,0,0 +/docs/4.6/reference/graphql,5,3,1.6666666666666667,3.6666666666666665,8,0,0 +/docs/4.6/reference/transactions,5,2,2.5,7.5,5,0,0 +/docs/4.6/technical-details/,5,5,1,1,23,0,0 +/docs/administration/harperdb-studio/instances,5,5,1,0,20,0,0 +/docs/developers/clustering/,5,5,1,2,24,0,0 +/docs/developers/clustering/requirements-and-definitions,5,4,1.25,0.25,14,0,0 +/learn/,5,4,1.25,2.25,12,0,0 +/release-notes/v4-tucker/4.3.2,5,4,1.25,1,15,0,0 +/docs/4.1/add-ons-and-sdks/google-data-studio,4,4,1,1.5,16,0,0 +/docs/4.1/audit-logging,4,4,1,2.5,18,0,0 +/docs/4.1/clustering/creating-a-cluster-user,4,3,1.3333333333333333,17.333333333333332,15,0,0 +/docs/4.1/clustering/naming-a-node,4,5,0.8,0.4,18,0,0 +/docs/4.1/clustering/requirements-and-definitions,4,4,1,0,15,0,0 +/docs/4.1/custom-functions,4,4,1,0.75,16,0,0 +/docs/4.1/harperdb-studio/manage-instance-roles,4,4,1,0.75,16,0,0 +/docs/4.1/reference/content-types,4,4,1,2.75,17,0,0 +/docs/4.1/sql-guide,4,4,1,0,16,0,0 +/docs/4.1/sql-guide/date-functions,4,4,1,0,16,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geocontains,4,4,1,2.5,19,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geodistance,4,4,1,2,19,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geoequal,4,4,1,0.5,17,0,0 +/docs/4.2/administration/harperdb-studio/instance-example-code,4,4,1,0.5,16,0,0 +/docs/4.2/administration/harperdb-studio/instance-metrics,4,3,1.3333333333333333,9.333333333333334,12,0,0 +/docs/4.2/administration/harperdb-studio/manage-instance-users,4,4,1,1.25,17,0,0 +/docs/4.2/deployments/install-harperdb/linux,4,4,1,1.5,15,0,0 +/docs/4.2/developers/applications/,4,4,1,9.75,14,0,0 +/docs/4.2/developers/clustering/requirements-and-definitions,4,4,1,1.25,17,0,0 +/docs/4.2/developers/operations-api/registration,4,3,1.3333333333333333,18,9,0,0 +/docs/4.2/developers/security/certificate-management,4,4,1,0,14,0,0 +/docs/4.2/developers/sql-guide/date-functions,4,4,1,0,16,0,0 +/docs/4.2/reference,4,3,1.3333333333333333,9.333333333333334,10,0,0 +/docs/4.2/release-notes/4.tucker/4.0.6,4,4,1,0.75,16,0,0 +/docs/4.3/administration/logging/audit-logging,4,4,1,1,16,0,0 +/docs/4.3/deployments/harperdb-cloud/alarms,4,4,1,0,15,0,0 +/docs/4.3/developers/clustering/certificate-management,4,4,1,0.75,17,0,0 +/docs/4.3/developers/clustering/creating-a-cluster-user,4,4,1,0.5,17,0,0 +/docs/4.3/developers/clustering/subscription-overview,4,4,1,0,15,0,0 +/docs/4.3/developers/components/operations,4,4,1,2.5,17,0,0 +/docs/4.3/developers/operations-api/users-and-roles,4,4,1,0,9,0,0 +/docs/4.3/developers/rest,4,4,1,1.25,18,0,0 +/docs/4.3/developers/security/certificate-management,4,4,1,0,16,0,0 +/docs/4.3/developers/sql-guide/functions,4,4,1,0,16,0,0 +/docs/4.3/reference/analytics,4,3,1.3333333333333333,30,11,1,0 +/docs/4.3/reference/architecture,4,4,1,0,14,0,0 +/docs/4.3/reference/clustering,4,3,1.3333333333333333,5.333333333333333,11,0,0 +/docs/4.3/technical-details/reference/storage-algorithm/,4,2,2,0,11,0,0 +/docs/4.4/administration/logging,4,1,4,14,4,0,0 +/docs/4.4/deployments/harper-cli,4,3,1.3333333333333333,7,10,0,0 +/docs/4.4/deployments/harper-cloud/verizon-5g-wavelength-instances,4,4,1,0,14,0,0 +/docs/4.4/deployments/install-harper/linux,4,4,1,3.25,17,0,0 +/docs/4.4/developers/applications/,4,4,1,6.5,20,0,0 +/docs/4.4/developers/applications/web-applications,4,4,1,0,17,0,0 +/docs/4.4/developers/clustering,4,4,1,2,16,0,0 +/docs/4.4/developers/operations-api/,4,4,1,1.25,18,0,0 +/docs/4.4/developers/security/users-and-roles,4,4,1,0.25,9,0,0 +/docs/4.4/foundations/harper-architecture,4,2,2,34.5,8,0,0 +/docs/4.4/foundations/use-cases,4,3,1.3333333333333333,0,10,0,0 +/docs/4.4/reference/architecture,4,4,1,0,13,0,0 +/docs/4.5/administration/harper-studio/create-account,4,4,1,2.5,16,0,0 +/docs/4.5/deployments/harper-cli,4,2,2,3,7,0,0 +/docs/4.5/deployments/install-harper/linux,4,4,1,1.5,14,0,0 +/docs/4.5/deployments/upgrade-hdb-instance,4,2,2,10,6,0,0 +/docs/4.5/developers/clustering/managing-subscriptions,4,4,1,1,18,0,0 +/docs/4.5/developers/operations-api/bulk-operations,4,2,2,1.5,8,0,0 +/docs/4.5/developers/operations-api/registration,4,3,1.3333333333333333,0.6666666666666666,11,0,0 +/docs/4.5/developers/operations-api/token-authentication,4,2,2,0,7,0,0 +/docs/4.5/getting-started/installation,4,3,1.3333333333333333,11.666666666666666,10,0,0 +/docs/4.5/reference/architecture,4,3,1.3333333333333333,0.3333333333333333,6,0,0 +/docs/4.6/administration/cloning,4,3,1.3333333333333333,2,11,0,0 +/docs/4.6/administration/harper-studio/query-instance-data,4,3,1.3333333333333333,0,8,0,0 +/docs/4.6/developers/applications/web-applications,4,3,1.3333333333333333,0.6666666666666666,10,0,0 +/docs/4.6/developers/clustering,4,2,2,1,4,0,0 +/docs/4.6/developers/components,4,4,1,1.75,15,0,0 +/docs/4.6/developers/operations-api/token-authentication,4,3,1.3333333333333333,26.333333333333332,11,0,0 +/docs/4.6/developers/security/certificate-management,4,4,1,0.75,17,0,0 +/docs/4.6/reference/clustering/things-worth-knowing,4,1,4,56,5,0,0 +/docs/4.6/reference/limits,4,3,1.3333333333333333,1,8,0,0 +/docs/4.6/technical-details/reference,4,4,1,1.5,14,0,0 +/docs/4.6/technical-details/reference/data-types,4,4,1,2,14,0,0 +/docs/administration/harperdb-studio/organizations,4,4,1,0,15,0,0 +/docs/developers/clustering/establishing-routes,4,4,1,0.25,14,0,0 +/docs/developers/miscellaneous,4,4,1,2.25,17,0,0 +/docs/reference/sql-guide/features-matrix,4,3,1.3333333333333333,1,10,0,0 +/docs/v/4.1/release-notes,4,4,1,2.25,16,0,0 +/docs/4.1/clustering/establishing-routes,3,3,1,0.3333333333333333,12,0,0 +/docs/4.1/harperdb-cloud,3,3,1,0,11,0,0 +/docs/4.1/harperdb-studio/instance-configuration,3,3,1,2,14,0,0 +/docs/4.1/harperdb-studio/manage-instance-users,3,3,1,0.3333333333333333,13,0,0 +/docs/4.1/jobs,3,3,1,1.3333333333333333,12,0,0 +/docs/4.1/sql-guide/json-search,3,3,1,0,12,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geoarea,3,3,1,2.3333333333333335,13,0,0 +/docs/4.2/administration/harperdb-studio/,3,3,1,0.6666666666666666,12,0,0 +/docs/4.2/administration/harperdb-studio/login-password-reset,3,3,1,2,13,0,0 +/docs/4.2/administration/jobs,3,3,1,2.3333333333333335,11,0,0 +/docs/4.2/administration/logging,3,3,1,0.6666666666666666,13,0,0 +/docs/4.2/administration/logging/standard-logging,3,3,1,0.3333333333333333,10,0,0 +/docs/4.2/deployments/configuration,3,3,1,3.6666666666666665,13,0,0 +/docs/4.2/deployments/harperdb-cloud/instance-size-hardware-specs,3,3,1,0,13,0,0 +/docs/4.2/developers/clustering/,3,3,1,0,12,0,0 +/docs/4.2/developers/clustering/subscription-overview,3,3,1,0.3333333333333333,13,0,0 +/docs/4.2/developers/sql-guide/reserved-word,3,3,1,1.3333333333333333,13,0,0 +/docs/4.2/reference/clustering/creating-a-cluster-user,3,3,1,0,9,0,0 +/docs/4.2/reference/limits,3,3,1,2.6666666666666665,13,0,0 +/docs/4.2/reference/sql-guide,3,2,1.5,37,6,0,0 +/docs/4.2/reference/sql-guide/json-search,3,3,1,0,10,0,0 +/docs/4.2/reference/storage-algorithm,3,3,1,1,11,0,0 +/docs/4.2/technical-details/reference/storage-algorithm,3,3,1,3,15,0,0 +/docs/4.3/,3,3,1,1,12,0,0 +/docs/4.3/administration/cloning,3,3,1,1,13,0,0 +/docs/4.3/administration/harperdb-studio/enable-mixed-content,3,3,1,0,12,0,0 +/docs/4.3/administration/jobs,3,3,1,1,12,0,0 +/docs/4.3/deployments/harperdb-cloud/instance-size-hardware-specs,3,3,1,0.6666666666666666,13,0,0 +/docs/4.3/developers/components/google-data-studio,3,3,1,1,13,0,0 +/docs/4.3/developers/sql-guide/json-search,3,3,1,1,13,0,0 +/docs/4.3/reference/dynamic-schema,3,3,1,5.333333333333333,14,0,0 +/docs/4.3/reference/headers,3,3,1,0.3333333333333333,13,0,0 +/docs/4.3/technical-details/reference/globals,3,3,1,1.6666666666666667,13,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/1.3.0,3,3,1,1.3333333333333333,14,0,0 +/docs/4.4/administration/cloning,3,2,1.5,0.5,7,0,0 +/docs/4.4/administration/harper-studio,3,3,1,0,12,0,0 +/docs/4.4/administration/harper-studio/instance-configuration,3,3,1,0.6666666666666666,13,0,0 +/docs/4.4/administration/harper-studio/login-password-reset,3,3,1,2,12,0,0 +/docs/4.4/deployments/harper-cloud/,3,3,1,0,12,0,0 +/docs/4.4/deployments/harper-cloud/instance-size-hardware-specs,3,3,1,0.3333333333333333,12,0,0 +/docs/4.4/deployments/harper-cloud/iops-impact,3,3,1,0,11,0,0 +/docs/4.4/developers/applications/debugging,3,3,1,0,11,0,0 +/docs/4.4/developers/clustering/subscription-overview,3,3,1,0,12,0,0 +/docs/4.4/developers/clustering/things-worth-knowing,3,3,1,0.6666666666666666,14,0,0 +/docs/4.4/developers/components,3,3,1,0,10,0,0 +/docs/4.4/developers/components/reference,3,3,1,0.3333333333333333,12,0,0 +/docs/4.4/developers/operations-api/advanced-json-sql-examples,3,3,1,2.6666666666666665,12,0,0 +/docs/4.4/developers/operations-api/clustering,3,2,1.5,6.5,9,0,0 +/docs/4.4/developers/operations-api/logs,3,3,1,1,13,0,0 +/docs/4.4/developers/replication/,3,3,1,0.3333333333333333,13,0,0 +/docs/4.4/foundations/core-concepts,3,2,1.5,1.5,7,0,0 +/docs/4.4/getting-started/installation,3,3,1,0,12,0,0 +/docs/4.4/reference/analytics,3,3,1,0,9,0,0 +/docs/4.4/reference/globals,3,2,1.5,8.5,5,0,0 +/docs/4.4/reference/limits,3,3,1,0.6666666666666666,13,0,0 +/docs/4.4/reference/resource,3,1,3,20,4,0,0 +/docs/4.5/administration/harper-studio/instances,3,3,1,1.3333333333333333,13,0,0 +/docs/4.5/administration/harper-studio/manage-applications,3,3,1,0.6666666666666666,13,0,0 +/docs/4.5/administration/harper-studio/manage-databases-browse-data,3,3,1,5.333333333333333,12,0,0 +/docs/4.5/developers/clustering/requirements-and-definitions,3,3,1,0,9,0,0 +/docs/4.5/developers/operations-api/advanced-json-sql-examples,3,1,3,27,3,0,0 +/docs/4.5/developers/operations-api/nosql-operations,3,1,3,2,3,0,0 +/docs/4.5/developers/operations-api/sql-operations,3,2,1.5,0.5,6,0,0 +/docs/4.5/developers/security/jwt-auth,3,3,1,1,13,0,0 +/docs/4.5/developers/security/users-and-roles,3,3,1,0,7,0,0 +/docs/4.5/reference/graphql,3,3,1,0.3333333333333333,5,0,0 +/docs/4.6/administration/harper-studio/manage-instance-users,3,2,1.5,7.5,7,0,0 +/docs/4.6/developers/clustering/things-worth-knowing,3,3,1,0.3333333333333333,13,0,0 +/docs/4.6/developers/security/basic-auth,3,2,1.5,1,7,0,0 +/docs/4.6/getting-started/,3,3,1,11,14,0,0 +/docs/4.6/reference/clustering/certificate-management,3,1,3,4,3,0,0 +/docs/4.6/reference/components/applications,3,2,1.5,0,5,0,0 +/docs/4.6/reference/data-types,3,1,3,12,3,0,0 +/docs/4.6/reference/dynamic-schema,3,2,1.5,1,3,0,0 +/docs/4.6/reference/sql-guide/functions,3,2,1.5,1,5,0,0 +/docs/4.6/reference/storage-algorithm,3,3,1,41.666666666666664,8,0,0 +/docs/administration/harperdb-studio/instance-metrics,3,3,1,0,11,0,0 +/docs/developers/components/sdks,3,3,1,2,13,0,0 +/docs/developers/replication/,3,3,1,4.333333333333333,15,0,0 +/docs/developers/replication/clustering/creating-a-cluster-user,3,3,1,0,12,0,0 +/docs/developers/replication/clustering/managing-subscriptions,3,3,1,0.3333333333333333,13,0,0 +/docs/next/developers/applications/defining-schemas,3,2,1.5,0.5,12,0,0 +/docs/sql-support,3,2,1.5,10,11,0,0 +/docs/v/4.2/developers/components/sdks,3,3,1,0.6666666666666666,12,0,0 +/fabric/rest-api,3,1,3,2,8,0,0 +/docs/4.1/custom-functions/requirements-definitions,2,2,1,0.5,8,0,0 +/docs/4.1/developers/operations-api/bulk-operations,2,2,1,7.5,10,0,0 +/docs/4.1/developers/real-time,2,2,1,0,7,0,0 +/docs/4.1/developers/security/users-and-roles,2,2,1,1.5,10,0,0 +/docs/4.1/harperdb-cloud/alarms,2,2,1,0.5,9,0,0 +/docs/4.1/reference,2,2,1,0,7,0,0 +/docs/4.1/reference/data-types,2,2,1,0.5,9,0,0 +/docs/4.1/release-notes,2,2,1,0,8,0,0 +/docs/4.1/release-notes/1.alby/1.1.0,2,2,1,0,8,0,0 +/docs/4.1/release-notes/3.monkey/3.2.1,2,2,1,0,8,0,0 +/docs/4.1/release-notes/4.tucker/4.0.2,2,2,1,0.5,9,0,0 +/docs/4.1/release-notes/4.tucker/4.0.3,2,2,1,0,8,0,0 +/docs/4.1/release-notes/4.tucker/4.0.5,2,2,1,1,9,0,0 +/docs/4.1/release-notes/4.tucker/4.0.6,2,2,1,0.5,9,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geonear,2,2,1,0,8,0,0 +/docs/4.1/sql-guide/update,2,2,1,0.5,9,0,0 +/docs/4.1/technical-details/reference/data-types,2,2,1,6,7,0,0 +/docs/4.1/technical-details/reference/globals,2,2,1,0.5,9,0,0 +/docs/4.2/administration/administration,2,2,1,0.5,9,0,0 +/docs/4.2/administration/harperdb-studio/enable-mixed-content,2,2,1,0,8,0,0 +/docs/4.2/administration/harperdb-studio/manage-charts,2,2,1,0,8,0,0 +/docs/4.2/deployments/harperdb-cloud/alarms,2,2,1,0,8,0,0 +/docs/4.2/deployments/install-harperdb,2,2,1,3,8,0,0 +/docs/4.2/developers/clustering,2,2,1,1,9,0,0 +/docs/4.2/developers/operations-api/,2,2,1,5.5,9,0,0 +/docs/4.2/developers/operations-api/custom-functions,2,2,1,0,8,0,0 +/docs/4.2/developers/replication,2,2,1,0,8,0,0 +/docs/4.2/developers/sql-guide,2,2,1,0.5,9,0,0 +/docs/4.2/developers/sql-guide/features-matrix,2,2,1,1.5,9,0,0 +/docs/4.2/developers/sql-guide/functions,2,2,1,0,8,0,0 +/docs/4.2/reference/architecture,2,2,1,4,8,0,0 +/docs/4.2/reference/clustering,2,2,1,1,9,0,0 +/docs/4.2/technical-details/reference/content-types,2,2,1,0.5,9,0,0 +/docs/4.2/technical-details/reference/limits,2,2,1,0,8,0,0 +/docs/4.2/technical-details/reference/transactions,2,2,1,1.5,9,0,0 +/docs/4.3/administration/administration,2,2,1,1,10,0,0 +/docs/4.3/administration/harperdb-studio/,2,3,0.6666666666666666,3.6666666666666665,9,0,0 +/docs/4.3/administration/harperdb-studio/create-account,2,2,1,0,8,0,0 +/docs/4.3/administration/harperdb-studio/instance-metrics,2,2,1,0,8,0,0 +/docs/4.3/administration/harperdb-studio/query-instance-data,2,2,1,0.5,9,0,0 +/docs/4.3/administration/logging/logging,2,2,1,2.5,9,0,0 +/docs/4.3/developers/,2,2,1,0.5,9,0,0 +/docs/4.3/developers/replication,2,2,1,0,8,0,0 +/docs/4.3/developers/sql-guide/features-matrix,2,2,1,4,9,0,0 +/docs/4.3/harperdb-studio/instance-metrics,2,2,1,0.5,8,0,0 +/docs/4.3/reference,2,2,1,0,8,0,0 +/docs/4.3/reference/clustering/managing-subscriptions,2,2,1,1.5,6,0,0 +/docs/4.3/reference/clustering/naming-a-node,2,2,1,0,7,0,0 +/docs/4.3/reference/sql-guide,2,2,1,2.5,7,0,0 +/docs/4.3/reference/sql-guide/date-functions,2,2,1,6,8,0,0 +/docs/4.3/technical-details/reference/storage-algorithm,2,2,1,2,10,0,0 +/docs/4.3/technical-details/reference/transactions,2,2,1,0.5,9,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/1.1.0,2,2,1,1,9,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/1.2.0,2,2,1,0.5,9,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.2.3,2,2,1,0.5,9,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.1.0,2,2,1,0,8,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.2.1,2,2,1,0.5,9,0,0 +/docs/4.4/administration/compact,2,2,1,1,6,0,0 +/docs/4.4/administration/harper-studio/instances,2,3,0.6666666666666666,1,9,0,0 +/docs/4.4/administration/harper-studio/manage-databases-browse-data,2,2,1,0,8,0,0 +/docs/4.4/administration/harperdb-studio/instances,2,2,1,3,9,0,0 +/docs/4.4/administration/harperdb-studio/login-password-reset,2,2,1,1,9,0,0 +/docs/4.4/administration/harperdb-studio/query-instance-data,2,2,1,0.5,9,0,0 +/docs/4.4/deployments/,2,2,1,0,8,0,0 +/docs/4.4/developers/applications/define-routes,2,2,1,0,8,0,0 +/docs/4.4/developers/applications/defining-roles,2,2,1,0,7,0,0 +/docs/4.4/developers/applications/defining-schemas,2,2,1,0,8,0,0 +/docs/4.4/developers/components/operations,2,2,1,1,9,0,0 +/docs/4.4/developers/miscellaneous,2,2,1,0,7,0,0 +/docs/4.4/developers/operations-api/clustering-nats,2,2,1,6,7,0,0 +/docs/4.4/developers/operations-api/jobs,2,2,1,1.5,7,0,0 +/docs/4.4/developers/operations-api/quickstart-examples,2,2,1,2,8,0,0 +/docs/4.4/developers/operations-api/utilities,2,2,1,0.5,7,0,0 +/docs/4.4/developers/security,2,2,1,0,5,0,0 +/docs/4.4/developers/security/certificate-management,2,2,1,0,7,0,0 +/docs/4.4/getting-started/,2,1,2,0,4,0,0 +/docs/4.4/getting-started/quickstart,2,2,1,0.5,9,0,0 +/docs/4.4/reference,2,2,1,0,8,0,0 +/docs/4.4/reference/clustering/creating-a-cluster-user,2,2,1,0,7,0,0 +/docs/4.4/reference/query-optimization,2,2,1,18.5,5,0,0 +/docs/4.4/reference/sql-guide/reserved-word,2,2,1,0,7,0,0 +/docs/4.4/technical-details/reference/storage-algorithm,2,2,1,0,7,0,0 +/docs/4.4/technical-details/reference/transactions,2,2,1,0,8,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.2.1,2,2,1,0.5,7,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/4.4.25,2,2,1,1,9,0,0 +/docs/4.5/administration/harper-studio/login-password-reset,2,2,1,4.5,9,0,0 +/docs/4.5/deployments/harper-cloud/verizon-5g-wavelength-instances,2,2,1,4.5,9,0,0 +/docs/4.5/deployments/install-harper,2,1,2,3,2,0,0 +/docs/4.5/deployments/install-harper/,2,2,1,2,7,0,0 +/docs/4.5/developers/applications/define-routes,2,1,2,1,2,0,0 +/docs/4.5/developers/clustering/certificate-management,2,2,1,0,7,0,0 +/docs/4.5/developers/clustering/creating-a-cluster-user,2,2,1,0,8,0,0 +/docs/4.5/developers/clustering/subscription-overview,2,2,1,4.5,8,0,0 +/docs/4.5/developers/clustering/things-worth-knowing,2,2,1,7,6,0,0 +/docs/4.5/developers/security/configuration,2,2,1,1,9,0,0 +/docs/4.5/developers/sql-guide,2,1,2,0,2,0,0 +/docs/4.5/developers/sql-guide/date-functions,2,2,1,2,10,0,0 +/docs/4.5/foundations/use-cases,2,1,2,0,2,0,0 +/docs/4.5/getting-started/quickstart,2,1,2,9,2,0,0 +/docs/4.5/reference/blob,2,2,1,0,5,0,0 +/docs/4.5/reference/clustering/certificate-management,2,1,2,3,3,0,0 +/docs/4.5/reference/roles,2,1,2,0,2,0,0 +/docs/4.5/reference/sql-guide/date-functions,2,1,2,16,2,0,0 +/docs/4.5/reference/sql-guide/functions,2,1,2,0,2,0,0 +/docs/4.5/reference/sql-guide/json-search,2,1,2,0,2,0,0 +/docs/4.5/reference/sql-guide/reserved-word,2,1,2,1,2,0,0 +/docs/4.5/reference/sql-guide/sql-geospatial-functions,2,1,2,1,2,0,0 +/docs/4.6/administration/compact,2,2,1,0.5,9,0,0 +/docs/4.6/administration/harper-studio/create-account,2,2,1,0,8,0,0 +/docs/4.6/administration/harper-studio/manage-databases-browse-data,2,2,1,1.5,9,0,0 +/docs/4.6/deployments/harper-cloud,2,2,1,3.5,6,0,0 +/docs/4.6/developers/clustering/naming-a-node,2,1,2,6,2,0,0 +/docs/4.6/developers/miscellaneous/sdks,2,2,1,0,7,0,0 +/docs/4.6/developers/sql-guide/sql-geospatial-functions,2,2,1,0.5,8,0,0 +/docs/4.6/foundations/use-cases,2,1,2,0,2,0,0 +/docs/4.6/reference/sql-guide,2,2,1,0.5,6,0,0 +/docs/4.6/reference/sql-guide/reserved-word,2,1,2,10,2,0,0 +/docs/4.6/technical-details/reference/architecture,2,2,1,0,7,0,0 +/docs/4.6/technical-details/reference/content-types,2,2,1,5,9,0,0 +/docs/administration/harper-studio/,2,2,1,1,9,0,0 +/docs/administration/harperdb-studio/create-account,2,2,1,0,7,0,0 +/docs/administration/harperdb-studio/login-password-reset,2,3,0.6666666666666666,3.3333333333333335,9,0,0 +/docs/administration/harperdb-studio/manage-clustering,2,2,1,3.5,9,0,0 +/docs/administration/harperdb-studio/manage-functions,2,2,1,1.5,9,0,0 +/docs/administration/harperdb-studio/query-instance-data,2,2,1,0,8,0,0 +/docs/configuration/clustering,2,2,1,1,6,0,0 +/docs/developers/clustering/managing-subscriptions,2,2,1,0.5,6,0,0 +/docs/developers/components/google-data-studio,2,2,1,0,8,0,0 +/docs/developers/replication/clustering/certificate-management,2,2,1,0.5,9,0,0 +/docs/developers/vector-indexes,2,2,1,0,8,0,0 +/docs/extensions/functions,2,1,2,2,5,0,0 +/docs/foundations/,2,2,1,2.5,6,0,0 +/docs/release-notes/4.tucker/4.0.3,2,2,1,1.5,9,0,0 +/docs/robots.txt,2,2,1,1,8,0,0 +/docs/sql-guide/insert,2,2,1,0,7,0,0 +/docs/technical-details/release-notes/4.tucker/1.3.0,2,2,1,4.5,10,0,0 +/docs/technical-details/release-notes/4.tucker/1.alby/1.1.0,2,2,1,0.5,9,0,0 +/docs/technical-details/release-notes/4.tucker/2.2.0,2,2,1,1.5,9,0,0 +/docs/technical-details/release-notes/4.tucker/2.penny,2,2,1,4,9,0,0 +/docs/technical-details/release-notes/4.tucker/3.1.0,2,2,1,1,9,0,0 +/docs/technical-details/release-notes/4.tucker/3.1.1,2,2,1,3,9,0,0 +/docs/technical-details/release-notes/4.tucker/3.1.3,2,2,1,0.5,9,0,0 +/docs/technical-details/release-notes/4.tucker/3.1.4,2,2,1,0,8,0,0 +/docs/technical-details/release-notes/4.tucker/3.monkey/3.1.2,2,2,1,1.5,8,0,0 +/docs/v/4.1/custom-functions/templates,2,2,1,0,7,0,0 +/docs/v/4.1/harperdb-studio/create-account,2,2,1,1,9,0,0 +/docs/v/4.1/release-notes/4.tucker/4.0.1,2,2,1,0,8,0,0 +/docs/v/4.1/sql-guide/update,2,2,1,1,9,0,0 +/docs/v/4.2/technical-details/reference/headers,2,2,1,0,6,0,0 +/fabric/Y2x1c3Rlci,2,2,1,11.5,9,0,0 +/404.html,1,1,1,7,5,0,0 +/developers/applications,1,1,1,0,4,0,0 +/developers/components/built-in,1,1,1,0,4,0,0 +/docs/4.1/administration,1,1,1,0,4,0,0 +/docs/4.1/administration/administration,1,1,1,3,5,0,0 +/docs/4.1/administration/cloning,1,1,1,3,5,0,0 +/docs/4.1/administration/harperdb-studio,1,1,1,0,4,0,0 +/docs/4.1/administration/harperdb-studio/create-account,1,1,1,0,4,0,0 +/docs/4.1/administration/harperdb-studio/enable-mixed-content,1,1,1,0,3,0,0 +/docs/4.1/administration/harperdb-studio/instances,1,1,1,0,4,0,0 +/docs/4.1/administration/harperdb-studio/login-password-reset,1,1,1,0,4,0,0 +/docs/4.1/administration/harperdb-studio/manage-charts,1,1,1,1,5,0,0 +/docs/4.1/administration/harperdb-studio/manage-instance-users,1,1,1,1,5,0,0 +/docs/4.1/administration/harperdb-studio/manage-schemas-browse-data,1,1,1,0,4,0,0 +/docs/4.1/administration/harperdb-studio/organizations,1,1,1,0,4,0,0 +/docs/4.1/administration/jobs,1,1,1,2,5,0,0 +/docs/4.1/administration/logging,1,1,1,1,5,0,0 +/docs/4.1/administration/logging/audit-logging,1,1,1,0,4,0,0 +/docs/4.1/administration/logging/logging,1,1,1,0,4,0,0 +/docs/4.1/administration/logging/transaction-logging,1,1,1,0,4,0,0 +/docs/4.1/custom-functions/define-routes,1,1,1,0,4,0,0 +/docs/4.1/deployments/harperdb-cli,1,1,1,0,4,0,0 +/docs/4.1/deployments/harperdb-cloud,1,1,1,2,5,0,0 +/docs/4.1/deployments/harperdb-cloud/instance-size-hardware-specs,1,1,1,0,4,0,0 +/docs/4.1/deployments/harperdb-cloud/iops-impact,1,1,1,0,4,0,0 +/docs/4.1/deployments/harperdb-cloud/verizon-5g-wavelength-instances,1,1,1,1,5,0,0 +/docs/4.1/deployments/install-harperdb,1,1,1,0,4,0,0 +/docs/4.1/deployments/install-harperdb/linux,1,1,1,0,4,0,0 +/docs/4.1/deployments/upgrade-hdb-instance,1,1,1,0,4,0,0 +/docs/4.1/developers/applications,1,1,1,1,5,0,0 +/docs/4.1/developers/applications/debugging,1,1,1,1,5,0,0 +/docs/4.1/developers/applications/define-routes,1,1,1,1,5,0,0 +/docs/4.1/developers/applications/defining-schemas,1,1,1,2,5,0,0 +/docs/4.1/developers/clustering/certificate-management,1,1,1,1,5,0,0 +/docs/4.1/developers/clustering/enabling-clustering,1,1,1,0,4,0,0 +/docs/4.1/developers/clustering/managing-subscriptions,1,1,1,1,5,0,0 +/docs/4.1/developers/clustering/naming-a-node,1,1,1,0,4,0,0 +/docs/4.1/developers/clustering/requirements-and-definitions,1,1,1,0,4,0,0 +/docs/4.1/developers/clustering/subscription-overview,1,1,1,0,4,0,0 +/docs/4.1/developers/clustering/things-worth-knowing,1,1,1,1,5,0,0 +/docs/4.1/developers/components,1,1,1,0,4,0,0 +/docs/4.1/developers/components/drivers,1,1,1,0,4,0,0 +/docs/4.1/developers/components/installing,1,2,0.5,1,6,0,0 +/docs/4.1/developers/components/writing-extensions,1,1,1,1,5,0,0 +/docs/4.1/developers/operations-api/clustering,1,1,1,0,4,0,0 +/docs/4.1/developers/operations-api/jobs,1,1,1,0,4,0,0 +/docs/4.1/developers/operations-api/logs,1,1,1,1,5,0,0 +/docs/4.1/developers/operations-api/registration,1,1,1,1,5,0,0 +/docs/4.1/developers/operations-api/sql-operations,1,1,1,0,4,0,0 +/docs/4.1/developers/operations-api/token-authentication,1,1,1,0,4,0,0 +/docs/4.1/developers/operations-api/users-and-roles,1,1,1,2,5,0,0 +/docs/4.1/developers/operations-api/utilities,1,1,1,2,5,0,0 +/docs/4.1/developers/replication,1,1,1,0,4,0,0 +/docs/4.1/developers/replication/clustering/certificate-management,1,1,1,0,4,0,0 +/docs/4.1/developers/replication/clustering/creating-a-cluster-user,1,1,1,0,4,0,0 +/docs/4.1/developers/replication/clustering/managing-subscriptions,1,1,1,1,5,0,0 +/docs/4.1/developers/replication/clustering/naming-a-node,1,1,1,0,4,0,0 +/docs/4.1/developers/security/basic-auth,1,1,1,2,5,0,0 +/docs/4.1/developers/security/certificate-management,1,1,1,0,3,0,0 +/docs/4.1/developers/security/configuration,1,1,1,2,5,0,0 +/docs/4.1/developers/sql-guide,1,1,1,0,4,0,0 +/docs/4.1/developers/sql-guide/date-functions,1,1,1,0,4,0,0 +/docs/4.1/developers/sql-guide/features-matrix,1,1,1,1,5,0,0 +/docs/4.1/developers/sql-guide/functions,1,1,1,0,4,0,0 +/docs/4.1/developers/sql-guide/sql-geospatial-functions,1,1,1,0,4,0,0 +/docs/4.1/harperdb-studio/create-account,1,1,1,0,3,0,0 +/docs/4.1/install-harperdb/,1,1,1,6,5,0,0 +/docs/4.1/operations-api,1,1,1,1,5,0,0 +/docs/4.1/reference/limits,1,1,1,0,4,0,0 +/docs/4.1/release-notes/1.alby,1,1,1,0,4,0,0 +/docs/4.1/release-notes/1.alby/1.2.0,1,1,1,0,4,0,0 +/docs/4.1/release-notes/1.alby/1.3.0,1,2,0.5,0,5,0,0 +/docs/4.1/release-notes/1.alby/1.3.1,1,1,1,0,3,0,0 +/docs/4.1/release-notes/2.penny/2.1.1,1,1,1,0,4,0,0 +/docs/4.1/release-notes/2.penny/2.2.0,1,1,1,0,4,0,0 +/docs/4.1/release-notes/2.penny/2.2.2,1,1,1,0,4,0,0 +/docs/4.1/release-notes/2.penny/2.2.3,1,2,0.5,0,5,0,0 +/docs/4.1/release-notes/2.penny/2.3.1,1,1,1,0,4,0,0 +/docs/4.1/release-notes/3.monkey,1,1,1,0,4,0,0 +/docs/4.1/release-notes/3.monkey/3.0.0,1,1,1,0,4,0,0 +/docs/4.1/release-notes/3.monkey/3.1.3,1,1,1,8,5,0,0 +/docs/4.1/release-notes/3.monkey/3.1.4,1,1,1,0,4,0,0 +/docs/4.1/release-notes/3.monkey/3.1.5,1,1,1,0,4,0,0 +/docs/4.1/release-notes/3.monkey/3.3.0,1,1,1,0,4,0,0 +/docs/4.1/release-notes/4.tucker,1,1,1,0,4,0,0 +/docs/4.1/release-notes/4.tucker/4.0.0,1,1,1,0,4,0,0 +/docs/4.1/release-notes/4.tucker/4.0.4,1,1,1,0,4,0,0 +/docs/4.1/release-notes/4.tucker/4.1.0,1,1,1,0,4,0,0 +/docs/4.1/rest,1,1,1,1,5,0,0 +/docs/4.1/security,1,1,1,5,5,0,0 +/docs/4.1/sql-guide/delete,1,1,1,0,4,0,0 +/docs/4.1/sql-guide/sql-geospatial-functions/geocrosses,1,1,1,0,3,0,0 +/docs/4.1/technical-details/reference/analytics,1,1,1,1,5,0,0 +/docs/4.1/technical-details/reference/architecture,1,1,1,0,4,0,0 +/docs/4.1/technical-details/reference/graphql,1,1,1,0,4,0,0 +/docs/4.1/technical-details/reference/resource,1,1,1,1,5,0,0 +/docs/4.1/technical-details/reference/storage-algorithm,1,1,1,1,5,0,0 +/docs/4.1/technical-details/reference/transactions,1,1,1,0,4,0,0 +/docs/4.1/technical-details/release-notes/4.tucker/1.2.0,1,1,1,2,5,0,0 +/docs/4.1/technical-details/release-notes/4.tucker/2.1.1,1,1,1,0,4,0,0 +/docs/4.1/technical-details/release-notes/4.tucker/2.penny/2.2.2,1,1,1,0,4,0,0 +/docs/4.1/technical-details/release-notes/4.tucker/3.1.2,1,1,1,8,5,0,0 +/docs/4.1/technical-details/release-notes/4.tucker/3.2.1,1,1,1,1,5,0,0 +/docs/4.2/4.2/developers/clustering/,1,1,1,1,5,0,0 +/docs/4.2/4.2/getting-started,1,1,1,4,5,0,0 +/docs/4.2/administration/harperdb-studio/create-account,1,1,1,2,5,0,0 +/docs/4.2/administration/harperdb-studio/query-instance-data,1,1,1,0,3,0,0 +/docs/4.2/administration/logging/logging,1,1,1,0,4,0,0 +/docs/4.2/clustering/creating-a-cluster-user,1,1,1,0,4,0,0 +/docs/4.2/clustering/things-worth-knowing,1,1,1,1,5,0,0 +/docs/4.2/developers/,1,1,1,1,5,0,0 +/docs/4.2/developers/clustering/enabling-clustering,1,1,1,0,3,0,0 +/docs/4.2/developers/clustering/managing-subscriptions,1,1,1,0,4,0,0 +/docs/4.2/developers/operations-api/token-authentication,1,1,1,3,5,0,0 +/docs/4.2/developers/replication/clustering/certificate-management,1,1,1,9,5,0,0 +/docs/4.2/harperdb-studio,1,1,1,0,4,0,0 +/docs/4.2/harperdb-studio/enable-mixed-content,1,1,1,7,5,0,0 +/docs/4.2/harperdb-studio/manage-functions,1,1,1,8,5,0,0 +/docs/4.2/harperdb-studio/manage-schemas-browse-data,1,1,1,0,4,0,0 +/docs/4.2/install-harperdb,1,1,1,6,5,0,0 +/docs/4.2/operations-api,1,1,1,0,4,0,0 +/docs/4.2/reference/clustering/naming-a-node,1,1,1,6,5,0,0 +/docs/4.2/reference/clustering/subscription-overview,1,1,1,0,3,0,0 +/docs/4.2/reference/clustering/things-worth-knowing,1,1,1,0,4,0,0 +/docs/4.2/reference/content-types,1,1,1,0,4,0,0 +/docs/4.2/reference/data-types,1,1,1,0,3,0,0 +/docs/4.2/reference/globals,1,1,1,1,4,0,0 +/docs/4.2/reference/sql-guide/date-functions,1,1,1,0,3,0,0 +/docs/4.2/reference/transactions,1,1,1,2,5,0,0 +/docs/4.2/release-notes/2.penny/2.2.3,1,1,1,2,5,0,0 +/docs/4.2/release-notes/2.penny/2.3.1,1,1,1,0,4,0,0 +/docs/4.2/release-notes/3.monkey,1,1,1,4,5,0,0 +/docs/4.2/release-notes/3.monkey/3.3.0,1,1,1,1,5,0,0 +/docs/4.2/release-notes/4.tucker/4.0.5,1,1,1,0,4,0,0 +/docs/4.2/rest,1,1,1,1,5,0,0 +/docs/4.2/security,1,1,1,1,5,0,0 +/docs/4.2/security/configuration,1,1,1,1,5,0,0 +/docs/4.2/sql-guide/delete,1,1,1,0,4,0,0 +/docs/4.2/sql-guide/insert,1,1,1,0,4,0,0 +/docs/4.2/sql-guide/sql-geospatial-functions/geoconvert,1,1,1,0,4,0,0 +/docs/4.2/technical-details/,1,1,1,0,4,0,0 +/docs/4.2/technical-details/reference/globals,1,1,1,0,4,0,0 +/docs/4.2/technical-details/reference/graphql,1,1,1,3,5,0,0 +/docs/4.2/technical-details/reference/resource,1,1,1,0,4,0,0 +/docs/4.2/technical-details/release-notes/4.tucker/1.2.0,1,1,1,2,5,0,0 +/docs/4.2/technical-details/release-notes/4.tucker/1.3.0,1,1,1,0,4,0,0 +/docs/4.2/technical-details/release-notes/4.tucker/1.3.1,1,1,1,0,4,0,0 +/docs/4.3/4.3/developers/rest,1,1,1,2,5,0,0 +/docs/4.3/administration/harperdb-studio/instance-example-code,1,1,1,0,4,0,0 +/docs/4.3/administration/harperdb-studio/manage-clustering,1,1,1,2,5,0,0 +/docs/4.3/administration/harperdb-studio/manage-functions,1,1,1,0,4,0,0 +/docs/4.3/administration/harperdb-studio/manage-schemas-browse-data,1,1,1,0,4,0,0 +/docs/4.3/clustering/creating-a-cluster-user,1,1,1,2,5,0,0 +/docs/4.3/deployments/,1,1,1,0,4,0,0 +/docs/4.3/developers/applications/,1,1,1,0,3,0,0 +/docs/4.3/developers/components/drivers,1,1,1,1,5,0,0 +/docs/4.3/developers/replication/clustering/certificate-management,1,1,1,8,5,0,0 +/docs/4.3/developers/replication/clustering/establishing-routes,1,1,1,0,4,0,0 +/docs/4.3/developers/security/mtls-auth,1,1,1,0,4,0,0 +/docs/4.3/developers/sql-guide/date-functions,1,1,1,0,4,0,0 +/docs/4.3/harperdb-studio,1,1,1,0,4,0,0 +/docs/4.3/harperdb-studio/enable-mixed-content,1,1,1,0,4,0,0 +/docs/4.3/harperdb-studio/login-password-reset,1,1,1,0,4,0,0 +/docs/4.3/harperdb-studio/manage-schemas-browse-data,1,1,1,0,4,0,0 +/docs/4.3/install-harperdb,1,1,1,0,4,0,0 +/docs/4.3/operations-api,1,1,1,0,4,0,0 +/docs/4.3/reference/clustering/enabling-clustering,1,1,1,0,4,0,0 +/docs/4.3/reference/content-types,1,1,1,4,4,0,0 +/docs/4.3/reference/limits,1,1,1,3,5,0,0 +/docs/4.3/reference/sql-guide/sql-geospatial-functions,1,1,1,0,3,0,0 +/docs/4.3/release-notes/2.penny/2.2.3,1,1,1,2,5,0,0 +/docs/4.3/release-notes/2.penny/2.3.1,1,1,1,2,5,0,0 +/docs/4.3/release-notes/3.monkey/3.3.0,1,1,1,0,4,0,0 +/docs/4.3/release-notes/4.tucker/4.0.5,1,1,1,10,5,0,0 +/docs/4.3/rest,1,1,1,0,4,0,0 +/docs/4.3/security,1,1,1,0,4,0,0 +/docs/4.3/sql-guide/delete,1,1,1,0,4,0,0 +/docs/4.3/sql-guide/insert,1,1,1,4,5,0,0 +/docs/4.3/sql-guide/select,1,1,1,1,5,0,0 +/docs/4.3/sql-guide/sql-geospatial-functions/geoequal,1,1,1,2,5,0,0 +/docs/4.3/technical-details/,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/1.alby,1,1,1,0,3,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.2.0,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.2.2,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.3.0,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.3.1,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.penny,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/2.penny/2.2.2,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.1.1,1,1,1,1,5,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.1.2,1,1,1,1,5,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.1.3,1,1,1,3,5,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.1.5,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.2.0,1,1,1,0,3,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.3.0,1,1,1,0,4,0,0 +/docs/4.3/technical-details/release-notes/4.tucker/3.monkey,1,1,1,0,4,0,0 +/docs/4.4./getting-started/,1,1,1,2,3,0,0 +/docs/4.4/administration/administration,1,1,1,3,5,0,0 +/docs/4.4/administration/harper-studio/create-account,1,1,1,0,5,0,0 +/docs/4.4/administration/harper-studio/instance-metrics,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio,1,1,1,2,5,0,0 +/docs/4.4/administration/harperdb-studio/create-account,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/enable-mixed-content,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/instance-configuration,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/instance-metrics,1,1,1,2,5,0,0 +/docs/4.4/administration/harperdb-studio/manage-applications,1,1,1,1,5,0,0 +/docs/4.4/administration/harperdb-studio/manage-databases-browse-data,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/manage-instance-roles,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/manage-instance-users,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/manage-replication,1,1,1,0,4,0,0 +/docs/4.4/administration/harperdb-studio/organizations,1,1,1,0,4,0,0 +/docs/4.4/administration/jobs,1,1,1,0,3,0,0 +/docs/4.4/administration/logging/logging,1,1,1,0,4,0,0 +/docs/4.4/deployments/harper-cloud/alarms,1,1,1,0,4,0,0 +/docs/4.4/deployments/harperdb-cloud/alarms,1,1,1,1,5,0,0 +/docs/4.4/deployments/harperdb-cloud/instance-size-hardware-specs,1,1,1,2,5,0,0 +/docs/4.4/deployments/harperdb-cloud/iops-impact,1,1,1,0,4,0,0 +/docs/4.4/deployments/harperdb-cloud/verizon-5g-wavelength-instances,1,1,1,4,5,0,0 +/docs/4.4/deployments/install-harperdb,1,1,1,2,5,0,0 +/docs/4.4/deployments/install-harperdb/linux,1,1,1,7,5,0,0 +/docs/4.4/developers/applications/example-projects,1,1,1,0,4,0,0 +/docs/4.4/developers/clustering/managing-subscriptions,1,1,1,0,3,0,0 +/docs/4.4/developers/components/installing,1,1,1,2,5,0,0 +/docs/4.4/developers/components/sdks,1,1,1,0,4,0,0 +/docs/4.4/developers/components/writing-extensions,1,1,1,0,4,0,0 +/docs/4.4/developers/miscellaneous/google-data-studio,1,1,1,0,4,0,0 +/docs/4.4/developers/miscellaneous/query-optimization,1,1,1,0,4,0,0 +/docs/4.4/developers/operations-api,1,1,1,2,5,0,0 +/docs/4.4/developers/operations-api/bulk-operations,1,1,1,0,4,0,0 +/docs/4.4/developers/operations-api/clustering/clustering-nats,1,1,1,2,5,0,0 +/docs/4.4/developers/operations-api/components,1,1,1,1,4,0,0 +/docs/4.4/developers/operations-api/custom-functions,1,1,1,1,4,0,0 +/docs/4.4/developers/operations-api/registration,1,1,1,0,3,0,0 +/docs/4.4/developers/operations-api/sql-operations,1,1,1,0,4,0,0 +/docs/4.4/developers/operations-api/token-authentication,1,1,1,3,5,0,0 +/docs/4.4/developers/replication,1,1,1,3,4,0,0 +/docs/4.4/developers/replication/clustering,1,1,1,1,5,0,0 +/docs/4.4/developers/replication/clustering/creating-a-cluster-user,1,1,1,2,5,0,0 +/docs/4.4/developers/replication/clustering/enabling-clustering,1,1,1,2,5,0,0 +/docs/4.4/developers/replication/clustering/establishing-routes,1,1,1,0,4,0,0 +/docs/4.4/developers/replication/clustering/managing-subscriptions,1,1,1,0,4,0,0 +/docs/4.4/developers/replication/clustering/naming-a-node,1,1,1,1,5,0,0 +/docs/4.4/developers/replication/clustering/requirements-and-definitions,1,1,1,0,4,0,0 +/docs/4.4/developers/replication/clustering/subscription-overview,1,1,1,1,5,0,0 +/docs/4.4/developers/replication/clustering/things-worth-knowing,1,1,1,2,5,0,0 +/docs/4.4/developers/rest,1,1,1,0,4,0,0 +/docs/4.4/developers/security/configuration,1,1,1,0,4,0,0 +/docs/4.4/developers/sql-guide,1,1,1,2,5,0,0 +/docs/4.4/developers/sql-guide/date-functions,1,1,1,2,5,0,0 +/docs/4.4/developers/sql-guide/features-matrix,1,1,1,0,3,0,0 +/docs/4.4/developers/sql-guide/functions,1,1,1,0,4,0,0 +/docs/4.4/developers/sql-guide/sql-geospatial-functions,1,1,1,0,4,0,0 +/docs/4.4/harperdb-studio/manage-schemas-browse-data,1,1,1,2,5,0,0 +/docs/4.4/reference/clustering,1,1,1,2,5,0,0 +/docs/4.4/reference/clustering/enabling-clustering,1,1,1,0,4,0,0 +/docs/4.4/reference/clustering/naming-a-node,1,1,1,0,4,0,0 +/docs/4.4/reference/clustering/requirements-and-definitions,1,1,1,0,4,0,0 +/docs/4.4/reference/clustering/things-worth-knowing,1,1,1,0,3,0,0 +/docs/4.4/reference/content-types,1,1,1,0,4,0,0 +/docs/4.4/reference/dynamic-schema,1,1,1,0,3,0,0 +/docs/4.4/reference/sql-guide,1,1,1,0,4,0,0 +/docs/4.4/reference/sql-guide/features-matrix,1,1,1,0,4,0,0 +/docs/4.4/reference/sql-guide/sql-geospatial-functions,1,1,1,0,3,0,0 +/docs/4.4/reference/storage-algorithm,1,2,0.5,6.5,5,0,0 +/docs/4.4/technical-details/,1,1,1,1,5,0,0 +/docs/4.4/technical-details/reference/content-types,1,1,1,1,5,0,0 +/docs/4.4/technical-details/reference/data-types,1,1,1,1,5,0,0 +/docs/4.4/technical-details/reference/dynamic-schema,1,1,1,2,5,0,0 +/docs/4.4/technical-details/reference/globals,1,1,1,1,5,0,0 +/docs/4.4/technical-details/reference/headers,1,1,1,4,5,0,0 +/docs/4.4/technical-details/reference/limits,1,1,1,0,4,0,0 +/docs/4.4/technical-details/reference/resource,1,1,1,1,5,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/1.2.0,1,1,1,0,3,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/1.3.0,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/1.alby,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/2.2.0,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/2.2.2,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/2.3.1,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/2.penny,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.0.0,1,1,1,2,5,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.1.0,1,1,1,5,5,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.1.1,1,1,1,1,5,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.1.3,1,1,1,2,5,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.1.4,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.1.5,1,1,1,1,5,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.3.0,1,1,1,0,4,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/4.4.26,1,1,1,1,5,0,0 +/docs/4.5/,1,1,1,1,5,0,0 +/docs/4.5/administration/administration,1,1,1,0,4,0,0 +/docs/4.5/administration/harper-studio/enable-mixed-content,1,1,1,0,4,0,0 +/docs/4.5/administration/harper-studio/instance-metrics,1,1,1,0,4,0,0 +/docs/4.5/administration/harper-studio/manage-instance-users,1,1,1,1,5,0,0 +/docs/4.5/administration/harper-studio/organizations,1,1,1,6,5,0,0 +/docs/4.5/administration/harper-studio/query-instance-data,1,1,1,0,4,0,0 +/docs/4.5/administration/harperdb-studio,1,1,1,1,5,0,0 +/docs/4.5/administration/harperdb-studio/manage-charts,1,1,1,1,5,0,0 +/docs/4.5/administration/harperdb-studio/manage-clustering,1,1,1,1,5,0,0 +/docs/4.5/administration/jobs,1,1,1,0,3,0,0 +/docs/4.5/deployments/harper-cloud/alarms,1,1,1,5,5,0,0 +/docs/4.5/developers/applications/example-projects,1,1,1,0,4,0,0 +/docs/4.5/developers/clustering/enabling-clustering,1,1,1,0,4,0,0 +/docs/4.5/developers/clustering/establishing-routes,1,1,1,0,4,0,0 +/docs/4.5/developers/miscellaneous,1,1,1,2,5,0,0 +/docs/4.5/developers/miscellaneous/sdks,1,1,1,1,5,0,0 +/docs/4.5/developers/replication/clustering/certificate-management,1,1,1,0,4,0,0 +/docs/4.5/developers/replication/clustering/enabling-clustering,1,1,1,1,5,0,0 +/docs/4.5/developers/replication/clustering/naming-a-node,1,1,1,0,4,0,0 +/docs/4.5/developers/replication/clustering/subscription-overview,1,1,1,0,4,0,0 +/docs/4.5/developers/security/basic-auth,1,1,1,2,5,0,0 +/docs/4.5/developers/security/mtls-auth,1,1,1,7,5,0,0 +/docs/4.5/developers/sql-guide/functions,1,1,1,0,4,0,0 +/docs/4.5/developers/sql-guide/reserved-word,1,1,1,0,4,0,0 +/docs/4.5/getting-started/first-harper-app,1,1,1,0,4,0,0 +/docs/4.5/getting-started/harper-concepts,1,1,1,0,4,0,0 +/docs/4.5/getting-started/install-harper,1,1,1,0,4,0,0 +/docs/4.5/reference/clustering/enabling-clustering,1,1,1,0,4,0,0 +/docs/4.5/reference/clustering/naming-a-node,1,1,1,0,4,0,0 +/docs/4.5/reference/clustering/requirements-and-definitions,1,1,1,0,4,0,0 +/docs/4.5/reference/clustering/things-worth-knowing,1,1,1,0,4,0,0 +/docs/4.5/reference/sql-guide/features-matrix,1,1,1,0,1,0,0 +/docs/4.5/technical-details/reference/blob,1,1,1,0,4,0,0 +/docs/4.5/technical-details/reference/data-types,1,1,1,0,4,0,0 +/docs/4.5/technical-details/reference/graphql,1,1,1,0,4,0,0 +/docs/4.5/technical-details/reference/headers,1,1,1,5,5,0,0 +/docs/4.5/technical-details/release-notes/4.tucker/1.alby,1,1,1,2,5,0,0 +/docs/4.5/technical-details/release-notes/4.tucker/2.1.1,1,1,1,0,4,0,0 +/docs/4.5/technical-details/release-notes/4.tucker/2.2.2,1,1,1,1,5,0,0 +/docs/4.5/technical-details/release-notes/4.tucker/2.penny,1,1,1,1,5,0,0 +/docs/4.5/technical-details/release-notes/4.tucker/3.1.5,1,1,1,0,4,0,0 +/docs/4.5/technical-details/release-notes/4.tucker/3.3.0,1,1,1,3,5,0,0 +/docs/4.6/administration/harper-studio/instance-metrics,1,1,1,0,4,0,0 +/docs/4.6/administration/harper-studio/manage-instance-roles,1,1,1,0,4,0,0 +/docs/4.6/deployments/harper-cloud/verizon-5g-wavelength-instances,1,1,1,0,4,0,0 +/docs/4.6/developers/,1,1,1,2,5,0,0 +/docs/4.6/developers/clustering/managing-subscriptions,1,1,1,0,3,0,0 +/docs/4.6/developers/miscellaneous,1,1,1,0,3,0,0 +/docs/4.6/developers/replication/clustering/certificate-management,1,1,1,0,4,0,0 +/docs/4.6/developers/replication/clustering/managing-subscriptions,1,1,1,0,4,0,0 +/docs/4.6/developers/sql-guide/functions,1,1,1,8,5,0,0 +/docs/4.6/getting-started/harper-concepts,1,1,1,0,4,0,0 +/docs/4.6/reference/sql-guide/date-functions,1,1,1,0,4,0,0 +/docs/4.6/reference/sql-guide/features-matrix,1,1,1,0,4,0,0 +/docs/4.6/reference/sql-guide/sql-geospatial-functions,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/analytics,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/components,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/components/applications,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/components/built-in-extensions,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/components/configuration,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/components/plugins,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/dynamic-schema,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/resource,1,1,1,0,4,0,0 +/docs/4.6/technical-details/reference/resources,1,1,1,2,5,0,0 +/docs/4.6/technical-details/reference/resources/migration,1,1,1,2,5,0,0 +/docs/4.6/technical-details/reference/transactions,1,1,1,0,4,0,0 +/docs/5.0/migration-guide,1,1,1,1,5,0,0 +/docs/add-ons-and-sdks/google-data-studio,1,1,1,0,4,0,0 +/docs/administration/edge,1,1,1,2,5,0,0 +"/docs/administration/harperdb-studio""",1,1,1,0,3,0,0 +/docs/administration/harperdb-studio/create-an-account,1,1,1,1,5,0,0 +/docs/administration/harperdb-studio/enable-mixed-content,1,1,1,1,5,0,0 +/docs/administration/harperdb-studio/instance-configuration,1,1,1,0,4,0,0 +/docs/administration/harperdb-studio/instance-example-code,1,1,1,0,4,0,0 +/docs/administration/harperdb-studio/manage-charts,1,1,1,1,5,0,0 +/docs/administration/harperdb-studio/manage-databases-browse-data,1,1,1,0,4,0,0 +/docs/administration/harperdb-studio/manage-instance-users,1,1,1,2,5,0,0 +/docs/administration/harperdb-studio/manage-replication,1,1,1,2,5,0,0 +/docs/administration/harperdb-studio/manage-schemas-browse-data,1,1,1,1,5,0,0 +/docs/api/ops-api,1,1,1,0,2,0,0 +/docs/cli,1,1,1,0,2,0,0 +/docs/cluster-setup,1,1,1,6,5,0,0 +/docs/clustering,1,1,1,0,4,0,0 +/docs/clustering/creating-a-cluster-user,1,1,1,0,4,0,0 +/docs/clustering/things-worth-knowing,1,1,1,1,5,0,0 +/docs/custom-functions/host-static,1,1,1,1,5,0,0 +/docs/custom-functions/restarting-server,1,1,1,1,5,0,0 +/docs/data-loading,1,1,1,5,3,0,0 +/docs/deployments/install-harper/harper-cli,1,1,1,3,3,0,0 +/docs/developers/applications/example-projects,1,1,1,0,4,0,0 +/docs/developers/components/,1,1,1,0,4,0,0 +/docs/developers/components/drivers,1,1,1,0,4,0,0 +/docs/developers/components/operations,1,1,1,0,4,0,0 +/docs/developers/miscellaneous/google-data-studio,1,1,1,0,4,0,0 +/docs/developers/operations-api/clustering/clustering-nats,1,1,1,0,4,0,0 +/docs/developers/plugin-best-practices,1,1,1,8,5,0,0 +/docs/developers/plugins,1,1,1,4,5,0,0 +/docs/developers/pub-sub,1,1,1,1,5,0,0 +/docs/developers/replication/clustering/establishing-routes,1,1,1,0,4,0,0 +/docs/developers/replication/clustering/naming-a-node,1,1,1,1,5,0,0 +/docs/developers/replication/clustering/requirements-and-definitions,1,1,1,0,4,0,0 +/docs/developers/request-lifecycle,1,1,1,5,5,0,0 +/docs/developers/testing,1,1,1,3,5,0,0 +/docs/graphql/overview,1,1,1,0,2,0,0 +/docs/index-5,1,1,1,0,4,0,0 +/docs/next/getting-started/installation,1,1,1,0,4,0,0 +/docs/next/reference/components/applications,1,1,1,2,5,0,0 +/docs/performance,1,1,1,0,2,0,0 +/docs/performance-guide,1,1,1,0,2,0,0 +/docs/performance-guide/data-modeling,1,1,1,2,3,0,0 +/docs/reference/,1,1,1,0,4,0,0 +/docs/reference/Applications/defining-roles,1,1,1,1,5,0,0 +/docs/reference/api/roles/add-role,1,1,1,0,3,0,0 +/docs/reference/command-line-interface,1,1,1,0,3,0,0 +/docs/reference/configuration-file,1,1,1,0,3,0,0 +/docs/reference/rest,1,1,1,0,3,0,0 +/docs/reference/security/roles-and-permissions,1,1,1,0,3,0,0 +/docs/release-notes/1.alby,1,1,1,0,4,0,0 +/docs/release-notes/1.alby/1.2.0,1,1,1,0,4,0,0 +/docs/release-notes/1.alby/1.3.0,1,1,1,0,4,0,0 +/docs/release-notes/2.penny/2.2.0,1,1,1,1,5,0,0 +/docs/release-notes/2.penny/2.3.1,1,1,1,1,5,0,0 +/docs/release-notes/3.monkey,1,1,1,0,4,0,0 +/docs/release-notes/4.tucker/4.0.5,1,1,1,1,5,0,0 +/docs/replication,1,1,1,0,3,0,0 +/docs/resources,1,1,1,0,2,0,0 +/docs/rest-api,1,1,1,0,3,0,0 +/docs/sql-guide/datatypes,1,1,1,0,3,0,0 +/docs/sql-guide/select,1,1,1,0,3,0,0 +/docs/sql-guide/sql-geospatial-functions/geoconvert,1,1,1,10,5,0,0 +/docs/support,1,1,1,0,4,0,0 +/docs/technical-details/,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/1.1.0,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/1.2.0,1,1,1,0,3,0,0 +/docs/technical-details/release-notes/4.tucker/1.3.1,1,1,1,7,5,0,0 +/docs/technical-details/release-notes/4.tucker/1.alby,1,1,1,2,5,0,0 +/docs/technical-details/release-notes/4.tucker/1.alby/1.2.0,1,1,1,2,5,0,0 +/docs/technical-details/release-notes/4.tucker/1.alby/1.3.0,1,1,1,3,5,0,0 +/docs/technical-details/release-notes/4.tucker/1.alby/1.3.1,1,1,1,1,5,0,0 +/docs/technical-details/release-notes/4.tucker/2.1.1,1,1,1,1,5,0,0 +/docs/technical-details/release-notes/4.tucker/2.2.3,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/2.3.1,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/2.penny/2.2.2,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/2.penny/2.2.3,1,1,1,1,5,0,0 +/docs/technical-details/release-notes/4.tucker/2.penny/2.3.0,1,1,1,2,5,0,0 +/docs/technical-details/release-notes/4.tucker/2.penny/2.3.1,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/3.0.0,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/3.1.2,1,1,1,0,4,0,0 +/docs/technical-details/release-notes/4.tucker/3.1.5,1,1,1,3,5,0,0 +/docs/technical-details/release-notes/4.tucker/3.2.0,1,1,1,2,5,0,0 +/docs/technical-details/release-notes/4.tucker/3.2.1,1,1,1,2,5,0,0 +/docs/technical-details/release-notes/4.tucker/3.monkey,1,1,1,2,5,0,0 +/docs/technical-details/release-notes/4.tucker/3.monkey/3.1.3,1,1,1,0,4,0,0 +/docs/v/4.1/audit-logging,1,1,1,0,4,0,0 +/docs/v/4.1/clustering,1,1,1,0,3,0,0 +/docs/v/4.1/clustering/enabling-clustering,1,1,1,0,4,0,0 +/docs/v/4.1/clustering/managing-subscriptions,1,1,1,0,4,0,0 +/docs/v/4.1/clustering/naming-a-node,1,1,1,4,5,0,0 +/docs/v/4.1/clustering/requirements-and-definitions,1,1,1,0,4,0,0 +/docs/v/4.1/configuration,1,1,1,2,5,0,0 +/docs/v/4.1/custom-functions/create-project,1,1,1,2,5,0,0 +/docs/v/4.1/custom-functions/custom-functions-operations,1,1,1,7,5,0,0 +/docs/v/4.1/custom-functions/define-helpers,1,1,1,0,4,0,0 +/docs/v/4.1/custom-functions/example-projects,1,1,1,0,4,0,0 +/docs/v/4.1/custom-functions/host-static,1,1,1,0,4,0,0 +/docs/v/4.1/custom-functions/requirements-definitions,1,1,1,1,5,0,0 +/docs/v/4.1/custom-functions/using-npm-git,1,1,1,0,4,0,0 +/docs/v/4.1/harperdb-4.2-pre-release/release-notes/2.penny,1,1,1,5,3,0,0 +/docs/v/4.1/harperdb-cloud,1,1,1,2,5,0,0 +/docs/v/4.1/harperdb-cloud/alarms,1,1,1,0,4,0,0 +/docs/v/4.1/harperdb-studio/enable-mixed-content,1,1,1,2,5,0,0 +/docs/v/4.1/harperdb-studio/instance-example-code,1,1,1,0,4,0,0 +/docs/v/4.1/harperdb-studio/manage-schemas-browse-data,1,1,1,0,2,0,0 +/docs/v/4.1/harperdb-studio/organizations,1,1,1,0,4,0,0 +/docs/v/4.1/install-harperdb/linux,1,1,1,0,4,0,0 +/docs/v/4.1/jobs,1,1,1,6,5,0,0 +/docs/v/4.1/logging,1,1,1,0,4,0,0 +/docs/v/4.1/reference/content-types,1,1,1,0,4,0,0 +/docs/v/4.1/reference/storage-algorithm,1,1,1,3,5,0,0 +/docs/v/4.1/release-notes/1.alby/1.1.0,1,1,1,5,5,0,0 +/docs/v/4.1/release-notes/1.alby/1.3.0,1,1,1,2,5,0,0 +/docs/v/4.1/release-notes/3.monkey/3.0.0,1,1,1,0,4,0,0 +/docs/v/4.1/release-notes/3.monkey/3.1.0,1,1,1,1,5,0,0 +/docs/v/4.1/release-notes/3.monkey/3.1.1,1,1,1,1,5,0,0 +/docs/v/4.1/release-notes/3.monkey/3.1.4,1,1,1,1,5,0,0 +/docs/v/4.1/release-notes/4.tucker/4.0.0,1,1,1,2,5,0,0 +/docs/v/4.1/release-notes/4.tucker/4.0.2,1,1,1,0,4,0,0 +/docs/v/4.1/release-notes/4.tucker/4.1.0,1,1,1,2,5,0,0 +/docs/v/4.1/security/users-and-roles,1,1,1,2,5,0,0 +/docs/v/4.1/sql-guide/insert,1,1,1,3,5,0,0 +/docs/v/4.1/sql-guide/select,1,1,1,0,4,0,0 +/docs/v/4.1/sql-guide/sql-geospatial-functions,1,1,1,0,4,0,0 +/docs/v/4.1/sql-guide/sql-geospatial-functions/geoarea,1,1,1,2,5,0,0 +/docs/v/4.1/sql-guide/sql-geospatial-functions/geodifference,1,1,1,1,5,0,0 +/docs/v/4.1/sql-guide/sql-geospatial-functions/geolength,1,1,1,0,4,0,0 +/docs/v/4.1/sql-guide/sql-geospatial-functions/geonear,1,1,1,0,4,0,0 +/docs/v/4.1/support,1,1,1,2,5,0,0 +/docs/v/4.1/technical-details/release-notes/4.tucker/2.2.3,1,1,1,0,4,0,0 +/docs/v/4.1/transaction-logging,1,1,1,0,4,0,0 +/docs/v/4.2,1,1,1,0,3,0,0 +/docs/v/4.2/administration/harperdb-studio/create-account,1,1,1,0,4,0,0 +/docs/v/4.2/administration/harperdb-studio/manage-charts,1,1,1,0,4,0,0 +/docs/v/4.2/administration/harperdb-studio/manage-functions,1,1,1,18,5,0,0 +/docs/v/4.2/administration/logging/transaction-logging,1,1,1,0,4,0,0 +/docs/v/4.2/developers/clustering/certificate-management,1,1,1,0,4,0,0 +/docs/v/4.2/developers/clustering/creating-a-cluster-user,1,1,1,0,4,0,0 +/docs/v/4.2/developers/clustering/enabling-clustering,1,1,1,3,5,0,0 +/docs/v/4.2/developers/clustering/things-worth-knowing,1,1,1,0,4,0,0 +/docs/v/4.2/developers/components/drivers,1,1,1,0,3,0,0 +/docs/v/4.2/developers/operations-api/users-and-roles,1,1,1,0,4,0,0 +/docs/v/4.2/developers/sql-guide/date-functions,1,1,1,3,5,0,0 +/docs/v/4.2/developers/sql-guide/features-matrix,1,1,1,0,4,0,0 +/docs/v/4.2/developers/sql-guide/reserved-word,1,1,1,3,5,0,0 +/docs/v/4.2/getting-started,1,1,1,0,4,0,0 +/docs/v/4.2/release-notes/2.penny/2.1.1,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/reference/analytics,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/reference/transactions,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/release-notes/1.alby/1.3.0,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/release-notes/2.penny/2.2.3,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/release-notes/3.monkey/3.1.5,1,1,1,1,5,0,0 +/docs/v/4.2/technical-details/release-notes/4.tucker/1.3.1,1,1,1,2,5,0,0 +/docs/v/4.2/technical-details/release-notes/4.tucker/3.1.4,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/release-notes/4.tucker/4.0.2,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/release-notes/4.tucker/4.2.4,1,1,1,0,4,0,0 +/docs/v/4.2/technical-details/release-notes/4.tucker/4.3.17,1,1,1,0,3,0,0 +/docs/v/4.4,1,1,1,1,5,0,0 +/docs/v/4.4/developers/clustering/enabling-clustering,1,1,1,0,3,0,0 +/docs/v/4.4/technical-details/release-notes/4.tucker/4.0.7,1,1,1,3,3,0,0 +/fabric/,1,1,1,8,3,0,0 +/fabric/functions,1,1,1,0,2,0,0 +/harperdb-4.2-pre-release/getting-started,1,1,1,2,5,0,0 +/harperdb-4.3-pre-release/developers/rest,1,1,1,2,4,0,0 +/learnjira,1,1,1,1,4,0,0 +/technical-details/reference/globals,1,1,1,0,4,0,0 +/view/1907183/RWM9vquZ/,1,1,1,3,5,0,0 +/docs/4.1/developers/security,0,1,0,12,1,0,0 +/docs/4.1/developers/security/jwt-auth,0,1,0,10,1,0,0 +/docs/4.4/administration/harperdb-studio/manage-charts,0,0,0,0,1,0,0 +/docs/4.4/administration/logging/audit-logging,0,1,0,0,1,0,0 +/docs/4.4/developers/sql-guide/json-search,0,1,0,2,2,0,0 +/docs/4.4/technical-details/release-notes/4.tucker/3.monkey,0,1,0,7,1,0,0 \ No newline at end of file diff --git a/sidebars.ts b/sidebars.ts deleted file mode 100644 index 9219ecf8..00000000 --- a/sidebars.ts +++ /dev/null @@ -1,43 +0,0 @@ -import type { SidebarsConfig } from '@docusaurus/plugin-content-docs'; - -const sidebars: SidebarsConfig = { - docsSidebar: [ - { - type: 'doc', - id: 'index', - label: 'Harper Docs', - }, - { - type: 'category', - label: 'Developers', - link: { - type: 'generated-index', - title: 'Developer Documentation', - description: 'Comprehensive guides and references for building applications with HarperDB', - keywords: ['developers', 'api', 'applications'], - }, - items: [{ type: 'autogenerated', dirName: 'developers' }], - }, - { - type: 'category', - label: 'Administration', - items: [{ type: 'autogenerated', dirName: 'administration' }], - }, - { - type: 'category', - label: 'Deployments', - items: [{ type: 'autogenerated', dirName: 'deployments' }], - }, - { - type: 'category', - label: 'Reference', - link: { - type: 'doc', - id: 'reference/index', - }, - items: [{ type: 'autogenerated', dirName: 'reference' }], - }, - ], -}; - -export default sidebars; diff --git a/sidebarsReference.ts b/sidebarsReference.ts new file mode 100644 index 00000000..b386fdc6 --- /dev/null +++ b/sidebarsReference.ts @@ -0,0 +1,16 @@ +import type { SidebarsConfig } from '@docusaurus/plugin-content-docs'; + +const sidebars: SidebarsConfig = { + referenceSidebar: [ + { + type: 'doc', + id: 'index', + label: 'Reference', + }, + { + type: 'autogenerated', dirName: '.' + } + ], +}; + +export default sidebars; diff --git a/src/pages/index.mdx b/src/pages/index.mdx new file mode 100644 index 00000000..1e975307 --- /dev/null +++ b/src/pages/index.mdx @@ -0,0 +1,55 @@ +--- +title: Welcome to Harper Documentation Site +--- + +import CustomDocCardList from '@site/src/components/CustomDocCardList'; + +# Harper Docs + +:::info + +### Get the Most Out of Harper + +Join our Discord to access expert support, collaborate with Harper's core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) +::: + +Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. + +Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. + +## Getting Started + +The best way to get started using Harper is to head over to the [Learn](/learn/) section and work through the Getting Started and Developer guides. + +## Building with Harper + + diff --git a/versioned_sidebars/version-4.1-sidebars.json b/versioned_sidebars/version-4.1-sidebars.json deleted file mode 100644 index e9135be7..00000000 --- a/versioned_sidebars/version-4.1-sidebars.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Developer Documentation" - }, - { - "type": "category", - "label": "Install HarperDB", - "items": [ - { - "type": "autogenerated", - "dirName": "install-harperdb" - } - ] - }, - { - "type": "category", - "label": "Getting Started", - "items": [ - { - "type": "autogenerated", - "dirName": "getting-started" - } - ] - }, - { - "type": "link", - "label": "Full API Documentation", - "href": "https://api.harperdb.io/" - }, - { - "type": "category", - "label": "HarperDB Studio", - "items": [ - { - "type": "autogenerated", - "dirName": "harperdb-studio" - } - ] - }, - { - "type": "category", - "label": "HarperDB Cloud", - "items": [ - { - "type": "autogenerated", - "dirName": "harperdb-cloud" - } - ] - }, - { - "type": "category", - "label": "Security", - "items": [ - { - "type": "autogenerated", - "dirName": "security" - } - ] - }, - { - "type": "category", - "label": "Clustering", - "items": [ - { - "type": "autogenerated", - "dirName": "clustering" - } - ] - }, - { - "type": "category", - "label": "Custom Functions", - "items": [ - { - "type": "autogenerated", - "dirName": "custom-functions" - } - ] - }, - { - "type": "category", - "label": "Add-ons and SDKs", - "items": [ - { - "type": "autogenerated", - "dirName": "add-ons-and-sdks" - } - ] - }, - { - "type": "category", - "label": "SQL Guide", - "items": [ - { - "type": "autogenerated", - "dirName": "sql-guide" - } - ] - }, - "harperdb-cli", - "configuration", - "logging", - "transaction-logging", - "audit-logging", - "jobs", - "upgrade-hdb-instance", - { - "type": "category", - "label": "Reference", - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - }, - "support" - ] -} diff --git a/versioned_sidebars/version-4.2-sidebars.json b/versioned_sidebars/version-4.2-sidebars.json deleted file mode 100644 index f4ff0475..00000000 --- a/versioned_sidebars/version-4.2-sidebars.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Harper Docs" - }, - "getting-started", - { - "type": "category", - "label": "Developers", - "items": [ - { - "type": "autogenerated", - "dirName": "developers" - } - ] - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "autogenerated", - "dirName": "administration" - } - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "autogenerated", - "dirName": "deployments" - } - ] - }, - { - "type": "category", - "label": "Reference", - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - } - ] -} diff --git a/versioned_sidebars/version-4.3-sidebars.json b/versioned_sidebars/version-4.3-sidebars.json deleted file mode 100644 index f4ff0475..00000000 --- a/versioned_sidebars/version-4.3-sidebars.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Harper Docs" - }, - "getting-started", - { - "type": "category", - "label": "Developers", - "items": [ - { - "type": "autogenerated", - "dirName": "developers" - } - ] - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "autogenerated", - "dirName": "administration" - } - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "autogenerated", - "dirName": "deployments" - } - ] - }, - { - "type": "category", - "label": "Reference", - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - } - ] -} diff --git a/versioned_sidebars/version-4.4-sidebars.json b/versioned_sidebars/version-4.4-sidebars.json deleted file mode 100644 index 71b91fa4..00000000 --- a/versioned_sidebars/version-4.4-sidebars.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Harper Docs" - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "autogenerated", - "dirName": "administration" - } - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "autogenerated", - "dirName": "deployments" - } - ] - }, - { - "type": "category", - "label": "Reference", - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - } - ] -} diff --git a/versioned_sidebars/version-4.5-sidebars.json b/versioned_sidebars/version-4.5-sidebars.json deleted file mode 100644 index c52613e4..00000000 --- a/versioned_sidebars/version-4.5-sidebars.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Harper Docs" - }, - { - "type": "category", - "label": "Developers", - "items": [ - { - "type": "autogenerated", - "dirName": "developers" - } - ] - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "autogenerated", - "dirName": "administration" - } - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "autogenerated", - "dirName": "deployments" - } - ] - }, - { - "type": "category", - "label": "Reference", - "link": { - "type": "doc", - "id": "reference/index" - }, - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - } - ] -} diff --git a/versioned_sidebars/version-4.6-sidebars.json b/versioned_sidebars/version-4.6-sidebars.json deleted file mode 100644 index c52613e4..00000000 --- a/versioned_sidebars/version-4.6-sidebars.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Harper Docs" - }, - { - "type": "category", - "label": "Developers", - "items": [ - { - "type": "autogenerated", - "dirName": "developers" - } - ] - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "autogenerated", - "dirName": "administration" - } - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "autogenerated", - "dirName": "deployments" - } - ] - }, - { - "type": "category", - "label": "Reference", - "link": { - "type": "doc", - "id": "reference/index" - }, - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - } - ] -} diff --git a/versioned_sidebars/version-4.7-sidebars.json b/versioned_sidebars/version-4.7-sidebars.json deleted file mode 100644 index 58b7ee57..00000000 --- a/versioned_sidebars/version-4.7-sidebars.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Harper Docs" - }, - { - "type": "category", - "label": "Developers", - "link": { - "type": "generated-index", - "title": "Developer Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers", "api", "applications"] - }, - "items": [ - { - "type": "autogenerated", - "dirName": "developers" - } - ] - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "autogenerated", - "dirName": "administration" - } - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "autogenerated", - "dirName": "deployments" - } - ] - }, - { - "type": "category", - "label": "Reference", - "link": { - "type": "doc", - "id": "reference/index" - }, - "items": [ - { - "type": "autogenerated", - "dirName": "reference" - } - ] - } - ] -} diff --git a/versions.json b/versions.json deleted file mode 100644 index 7bd6509f..00000000 --- a/versions.json +++ /dev/null @@ -1 +0,0 @@ -["4.7", "4.6", "4.5", "4.4", "4.3", "4.2", "4.1"] From 78eca4bed4630fd81f8f9328c7ed7e0e603a9589 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Wed, 18 Feb 2026 18:36:38 -0700 Subject: [PATCH 02/51] v4 docs reference plan init --- v4-docs-execution-procedure.md | 591 +++++++++++ v4-docs-implementation-plan.md | 546 +++++++++++ v4-docs-migration-map.md | 606 ++++++++++++ v4-docs-project-brief.md | 364 +++++++ v4-docs-reference-plan.md | 362 +++++++ v4-docs-research.md | 326 ++++++ v4-feature-history-ai-gen.md | 1692 ++++++++++++++++++++++++++++++++ 7 files changed, 4487 insertions(+) create mode 100644 v4-docs-execution-procedure.md create mode 100644 v4-docs-implementation-plan.md create mode 100644 v4-docs-migration-map.md create mode 100644 v4-docs-project-brief.md create mode 100644 v4-docs-reference-plan.md create mode 100644 v4-docs-research.md create mode 100644 v4-feature-history-ai-gen.md diff --git a/v4-docs-execution-procedure.md b/v4-docs-execution-procedure.md new file mode 100644 index 00000000..25181c1e --- /dev/null +++ b/v4-docs-execution-procedure.md @@ -0,0 +1,591 @@ +# Harper v4 Documentation Migration - Execution Procedure + +This document outlines the practical execution approach for implementing the v4 documentation migration defined in [v4-docs-implementation-plan.md](./v4-docs-implementation-plan.md). + +## Overview + +**Goal**: Use AI agents (Claude Code) to generate initial documentation drafts, then have humans review and refine. + +**Approach**: Start with a pilot section to validate the process, then scale to remaining sections. + +**Timeline**: Estimated 3-4 weeks total (1-2 days AI generation + 2-3 weeks human review) + +--- + +## Execution Strategy + +### Recommended Approach: VSCode Extension with Pilot + +We'll use Claude Code in VSCode to orchestrate the migration. This gives us: +- ✅ Full visibility and control over what's being generated +- ✅ Ability to course-correct between sections +- ✅ Easy local testing before pushing +- ✅ Familiar development workflow + +**Alternative considered**: Fully automated Agent SDK script. While this could generate all 20 PRs at once, we prefer the hybrid approach to validate quality first and maintain control. + +### Three-Phase Approach + +**Phase 1: Pilot (2 sections)** +- Run CLI section migration to test the process +- Review output quality and completeness +- Run Security section as validation +- Refine prompts and procedures based on learnings + +**Phase 2: Scale (Remaining 18 sections)** +- Continue with VSCode approach for remaining sections +- Run 2-3 sections in parallel (multiple VSCode windows) +- Option to build Agent SDK automation if VSCode becomes tedious + +**Phase 3: Finalization** +- Resolve link placeholders +- Update cross-references +- Configure sidebars and redirects +- Final cleanup + +--- + +## Prerequisites + +### Environment Setup + +```bash +# Ensure on correct branch +git checkout major-version-reorg +git pull + +# Create directory for tracking metadata +mkdir -p migration-context/link-placeholders + +# Verify gh CLI is authenticated +gh auth status + +# Verify VSCode with Claude Code extension is installed and configured +``` + +### Required Files (Already Created) + +- ✅ `v4-docs-implementation-plan.md` - Detailed implementation instructions +- ✅ `v4-docs-migration-map.md` - Mapping of old → new paths +- ✅ `v4-docs-reference-plan.md` - Target structure and philosophy +- ✅ `v4-docs-research.md` - Manual research notes +- ✅ `v4-feature-history-ai-gen.md` - AI-generated feature history + +--- + +## Phase 1: Pilot Execution + +### Pilot Section 1: CLI + +**Why CLI first?** +- Relatively stable across versions +- Simple structure (4 files) +- Good test of the entire workflow +- Low risk if something goes wrong + +**Steps:** + +1. **Start Claude Code in VSCode** + - Open VSCode in the documentation repository + - Start a new Claude Code chat + +2. **Provide the prompt:** + ``` + I need you to migrate the CLI section following the implementation plan. + + Context files to read: + - v4-docs-implementation-plan.md (Part 1: Initial Content Generation) + - v4-docs-migration-map.md (CLI Section) + - v4-docs-reference-plan.md (overall structure) + + Task: + 1. Read the CLI section entry from the migration map + 2. Read all source files listed (versioned_docs/version-4.7/deployments/harper-cli.md, etc.) + 3. Read release notes for version annotations + 4. Generate new files in reference_versioned_docs/version-v4/cli/ with: + - Inline source comments + - Version annotations with confidence levels + - Link placeholders for cross-references + 5. Create migration-context/link-placeholders/cli-link-placeholders.md + 6. Create branch: migration/cli + 7. Commit changes + 8. Open PR using the template from implementation plan + 9. Update v4-docs-migration-map.md status to "In Progress" + + Follow all agent instructions from Part 1 of the implementation plan. + ``` + +3. **Monitor the process:** + - Watch as Claude Code reads files and generates content + - Review generated files as they're created + - Check that inline source comments are present + - Verify branch and commit are created + +4. **Review the PR:** + - Check PR description follows template + - Verify all required sections are filled out + - Note quality of content, version annotations, placeholders + +5. **Document findings:** + - What worked well? + - What needs improvement? + - Any prompt refinements needed? + +### Pilot Section 2: Security + +**Why Security second?** +- More complex than CLI (8 files) +- Tests handling of cross-cutting concerns +- Validates the process scales beyond simple sections + +**Steps:** + +1. **Refine prompt based on CLI learnings** +2. **Run same process** with Security section +3. **Compare results** - is quality consistent? +4. **Decide on scaling approach:** + - If both pilots successful → continue with VSCode + - If quality issues → refine prompts, try again + - If tedious/repetitive → consider Agent SDK automation + +--- + +## Phase 2: Scale Execution + +### Batch Processing + +Organize remaining 18 sections into batches based on the implementation plan: + +**Batch 1: Simple sections (3 sections)** +- Content Types +- Headers +- GraphQL Querying + +**Batch 2: Medium complexity (7 sections)** +- Environment Variables +- Static Files +- HTTP +- MQTT +- Logging +- Analytics +- Studio + +**Batch 3: Complex sections (5 sections)** +- REST +- Replication +- Database +- Resources +- Components + +**Batch 4: Cross-cutting (2 sections)** +- Operations API +- Configuration + +**Batch 5: Legacy (1 section)** +- Legacy content + +### Parallel Execution + +**Option A: Sequential** +- Run one section at a time +- Safest approach +- Slower but easier to manage + +**Option B: Parallel (Recommended)** +- Open 2-3 VSCode windows +- Run 2-3 sections simultaneously +- Faster while maintaining control +- Can handle ~5 sections per day + +**Option C: Automated** +- Build Agent SDK script after successful pilots +- Generate all remaining PRs at once +- Fastest but less control + +### Prompt Template + +For each section, use this template (customize [PLACEHOLDERS]): + +``` +Migrate the [SECTION] section following the implementation plan. + +Context files: +- v4-docs-implementation-plan.md (Part 1 instructions) +- v4-docs-migration-map.md ([SECTION] Section entry) +- v4-docs-reference-plan.md (structure reference) + +Key details for this section: +- Output directory: reference_versioned_docs/version-v4/[section]/ +- Primary source: [PRIMARY_SOURCE_PATH from migration map] +- Additional sources: [LIST from migration map] +- Link placeholder tracker: migration-context/link-placeholders/[section]-link-placeholders.md + +Task: +1. Read the [SECTION] section entry from migration map +2. Read all source files +3. Read relevant release notes +4. Generate new reference files following the structure +5. Include inline source comments for traceability +6. Add version annotations with confidence levels +7. Use link placeholders for cross-references +8. Create link placeholder tracker +9. Create branch: migration/[section] +10. Commit with message: "docs: migrate [section] to v4 consolidated reference" +11. Open PR using the template +12. Update migration map status to "In Progress" + +Follow all Part 1 agent instructions carefully. +``` + +--- + +## Phase 3: Human Review Process + +### For Each PR + +**Review Checklist:** + +1. **Content Quality** + - [ ] Is the content accurate and complete? + - [ ] Does it make sense to a reader? + - [ ] Are examples clear and correct? + +2. **Version Annotations** + - [ ] Are version annotations present where appropriate? + - [ ] Do they match release notes/version comparisons? + - [ ] Are confidence levels noted (verified vs. inferred)? + +3. **Source Documentation** + - [ ] Are inline source comments present? + - [ ] Can we trace content back to original sources? + - [ ] Is the PR description complete? + +4. **Link Placeholders** + - [ ] Are placeholders in the correct format? + - [ ] Is the link tracker file created? + - [ ] Do placeholders make sense for targets? + +5. **Structure** + - [ ] Files in correct location (reference_versioned_docs/version-v4/)? + - [ ] Follows the structure from reference plan? + - [ ] No removal of versioned_docs content? + +### Review Workflow + +1. Reviewer assigned to PR +2. Reviewer goes through checklist +3. Reviewer edits content directly in PR (or requests changes) +4. Reviewer resolves any "needs verification" annotations +5. Reviewer handles image decisions (if any) +6. Reviewer approves and merges +7. Reviewer updates migration-map.md status to "Complete" +8. Reviewer checks off tracking issue + +### Review Velocity + +- Target: 2-3 PRs reviewed per day +- Simple sections: 30-60 minutes each +- Complex sections: 2-4 hours each +- Total review time: ~2-3 weeks + +--- + +## Phase 4: Post-Generation Cleanup + +After all sections are merged, run cleanup phases from implementation plan. + +### 4.1: Link Resolution + +**Using Claude Code:** + +``` +Resolve link placeholders following Part 3 of the implementation plan. + +Context: +- All migration-context/link-placeholders/*.md files +- All reference_versioned_docs/version-v4/ files + +Task: +1. Read all placeholder tracker files +2. Scan reference_versioned_docs/version-v4/ to see what exists +3. For each placeholder, replace TODO:path with correct relative path +4. Create PR(s) for link resolution (one per section recommended) +5. Flag any unresolvable links for human review + +Follow Part 3 instructions from implementation plan. +``` + +### 4.2: Cross-Reference Updates + +Update links in release_notes/ and learn/ content: + +``` +Update cross-references following Part 4 of the implementation plan. + +Task: +1. Scan release_notes/ for old documentation paths +2. Map to new paths using migration map +3. Update links +4. Create PR + +Do the same for learn/ content. +``` + +### 4.3: Sidebar Configuration + +``` +Create Docusaurus sidebar configuration following Part 5 of the implementation plan. + +Task: +1. Read reference plan outline for hierarchy +2. Scan reference_versioned_docs/version-v4/ for actual files +3. Generate sidebar JSON/JS following Docusaurus conventions +4. Ensure non-collapsible sections as noted in plan +5. Create PR +``` + +### 4.4: Redirects + +``` +Configure redirects following Part 6 of the implementation plan. + +Task: +1. Analyze existing redirects.ts +2. Use migration map to determine new paths +3. Generate redirect rules (prioritize most-visited pages) +4. Create PR +``` + +### 4.5: Final Cleanup + +**Human tasks:** +1. Review orphaned content (files not in migration map) +2. Remove old versioned_docs/version-4.X/ folders +3. Build docs locally and validate +4. Test redirects +5. Final spot-checks + +--- + +## Progress Tracking + +### GitHub Tracking Issue + +Create an issue titled "v4 Documentation Migration Progress Tracker" with this body: + +```markdown +Tracking migration of v4 documentation to consolidated structure. + +## Phase 1: Pilots +- [ ] #[PR] CLI (Pilot 1) +- [ ] #[PR] Security (Pilot 2) + +## Phase 2: Batch 1 - Simple +- [ ] #[PR] Content Types +- [ ] #[PR] Headers +- [ ] #[PR] GraphQL Querying + +## Phase 2: Batch 2 - Medium +- [ ] #[PR] Environment Variables +- [ ] #[PR] Static Files +- [ ] #[PR] HTTP +- [ ] #[PR] MQTT +- [ ] #[PR] Logging +- [ ] #[PR] Analytics +- [ ] #[PR] Studio + +## Phase 2: Batch 3 - Complex +- [ ] #[PR] REST +- [ ] #[PR] Replication +- [ ] #[PR] Database +- [ ] #[PR] Resources +- [ ] #[PR] Components + +## Phase 2: Batch 4 - Cross-cutting +- [ ] #[PR] Operations API +- [ ] #[PR] Configuration + +## Phase 2: Batch 5 - Legacy +- [ ] #[PR] Legacy Content + +## Phase 3: Cleanup +- [ ] Link resolution +- [ ] Cross-references updated +- [ ] Sidebars configured +- [ ] Redirects configured +- [ ] Old content removed + +## Phase 4: Finalization +- [ ] Final validation complete +- [ ] Merged to main +``` + +### Migration Map Status + +Update `v4-docs-migration-map.md` status field for each section: +- "In Progress" when PR is opened +- "Complete" when PR is merged + +--- + +## Team Roles + +### AI Agent (Claude Code) +- Generate initial content drafts +- Follow migration map and implementation plan +- Create branches, commits, PRs +- Track placeholders and sources + +### Human Reviewers +- Verify content accuracy +- Validate version annotations +- Edit and improve content +- Make final decisions on uncertainties +- Merge PRs + +### Project Lead +- Coordinate the migration +- Assign reviewers to PRs +- Monitor progress via tracking issue +- Make decisions on edge cases + +--- + +## Communication Plan + +### Kickoff Meeting +- Present this plan to the team +- Walk through pilot sections +- Assign initial reviewers +- Set expectations for review velocity + +### Weekly Syncs +- Review progress on tracking issue +- Discuss any blockers or issues +- Adjust approach if needed +- Assign upcoming reviews + +### Ad-hoc Communication +- Slack/Discord for quick questions +- PR comments for content-specific discussions +- Document any process improvements + +--- + +## Success Metrics + +- [ ] All 20 sections have PRs opened +- [ ] All PRs pass initial quality review +- [ ] 95%+ of version annotations verified +- [ ] All link placeholders resolved +- [ ] Documentation builds without errors +- [ ] Old versioned_docs removed +- [ ] Successfully merged to main + +--- + +## Risk Mitigation + +### Risk: AI generates incorrect content +**Mitigation**: +- Pilot sections first to validate quality +- Inline source documentation for traceability +- Human review on every PR +- Can always reference original sources + +### Risk: Process takes longer than expected +**Mitigation**: +- Flexible timeline (3-4 weeks is estimate) +- Can parallelize more aggressively if needed +- Can pause and adjust if blockers arise + +### Risk: Link placeholders are confusing +**Mitigation**: +- Clear format defined upfront +- Section-specific tracker files +- Separate cleanup phase dedicated to resolving them + +### Risk: Team capacity for reviews +**Mitigation**: +- Can adjust review velocity +- Can spread reviews over longer period +- Simple sections are quick to review + +--- + +## Decision Points + +### After Pilot Phase +**Decision**: Continue with VSCode or build Agent SDK automation? +- **If** pilots are successful and quality is good → Continue with VSCode +- **If** VSCode becomes tedious → Build Agent SDK script +- **If** quality issues → Refine prompts and retry + +### During Scale Phase +**Decision**: Sequential or parallel execution? +- **If** team has capacity → Run 2-3 sections in parallel +- **If** reviewers are overwhelmed → Slow down to sequential +- **If** going well → Scale up to more parallelization + +### Before Final Cleanup +**Decision**: Ready to remove old content? +- **If** all content migrated and verified → Proceed with removal +- **If** any uncertainties remain → Pause and resolve +- **If** redirects not ready → Complete redirects first + +--- + +## Next Steps + +1. **Review this plan with team** - Get feedback and buy-in +2. **Set up environment** - Ensure VSCode, Claude Code, gh CLI ready +3. **Create tracking issue** - Set up progress tracking +4. **Run pilot 1 (CLI)** - Execute and evaluate +5. **Team sync** - Review pilot results and decide on scaling approach +6. **Scale execution** - Continue with remaining sections +7. **Complete cleanup** - Final phases and merge to main + +--- + +## Questions for Team Discussion + +1. Who will be responsible for reviewing PRs? (Assign per section or per batch?) +2. What's our target review velocity? (How many PRs per day can we handle?) +3. Should we run pilots first, or are we confident enough to start scaling immediately? +4. Do we want to build Agent SDK automation, or stick with VSCode throughout? +5. Any concerns about the 3-4 week timeline? +6. Any sections that need special attention or domain expertise? + +--- + +## Appendix: Troubleshooting + +### If Claude Code goes off track +- Stop the generation +- Review what it's done so far +- Refine the prompt with more specific guidance +- Restart with the refined prompt + +### If content quality is poor +- Check that Claude Code read the right source files +- Verify inline source comments are present +- Look at similar PRs to see if it's a pattern +- Refine the prompt to be more specific about quality expectations + +### If link placeholders are confusing +- Review the placeholder tracker file +- Check the format matches the specification +- Update the placeholder with more context/description +- Flag for human reviewer to fix in the PR + +### If version annotations are wrong +- Check release notes to verify +- Look at git history of source files +- Mark as "needs verification" and have human research +- Update in the review process + +### If merge conflicts occur +- Should be rare since we're only adding files +- If they happen, likely in migration-map.md or tracking files +- Resolve manually, prioritizing latest changes +- Consider using more granular tracker files per section diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md new file mode 100644 index 00000000..a17ccdf1 --- /dev/null +++ b/v4-docs-implementation-plan.md @@ -0,0 +1,546 @@ +# Harper v4 Documentation Migration Implementation Plan + +This document outlines the concrete steps for migrating Harper v4 documentation from `versioned_docs/version-4.X/` into a consolidated `reference_versioned_docs/version-v4/` structure as defined in [v4-docs-reference-plan.md](./v4-docs-reference-plan.md) and mapped in [v4-docs-migration-map.md](./v4-docs-migration-map.md). + +## Overview + +**Branch Strategy**: All work happens on `major-version-reorg` branch. Once complete, merge to `main` in one go. + +**Target Directory**: All new content goes into `reference_versioned_docs/version-v4/` (NOT `reference/`). After v4 migration is complete, we'll copy to `reference/` to kickstart v5 (out of scope for this plan). + +**Approach**: AI agents do initial content generation → humans review and edit → merge → repeat until complete → cleanup passes. + +--- + +## Part 1: Initial Content Generation (AI-Driven) + +### Overview +AI agents work through the migration map, creating PRs for each top-level section. All PRs are opened simultaneously from the same base commit. Each PR adds new files without removing anything from `versioned_docs/`. + +### Agent Instructions + +For each section in the migration map, the agent should: + +1. **Read the migration map entry** for the section +2. **Read all source files** listed in "Primary Source" and "Additional Sources" +3. **Read relevant release notes** from `release_notes/` for version annotations +4. **Generate new reference files** following the structure in the reference plan +5. **Add inline source comments** documenting what was used: + ```markdown + + + + ``` + +6. **Use link placeholders** for cross-references that don't exist yet: + ```markdown + [JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md "Will be created in security section") + ``` + +7. **Create section-specific link placeholder tracker**: + - Store in `migration-context/link-placeholders/` + - Named by section: `cli-link-placeholders.md`, `security-link-placeholders.md`, etc. + - Format: + ```markdown + # Link Placeholders for [Section Name] + + ## reference_versioned_docs/version-v4/[section]/[file].md + + - Line 45: `[JWT Auth](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md)` + - Context: Discussing authentication methods + - Target should be: Main JWT authentication reference page + + - Line 123: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` + - Context: Listing all available operations + - Target should be: Complete operations list + ``` + +8. **Add version annotations** using the strategy defined in reference plan: + ```markdown + ## Relationships + + Added in: v4.3.0 + + The `@relation` directive... + ``` + + **Include confidence levels**: + - "Added in: v4.3.0 (confirmed via release notes)" + - "Added in: v4.3.0 (inferred from version comparison, needs verification)" + - "Changed in: v4.4.0 (likely, needs human verification)" + +9. **Note conflicts and uncertainties** in PR description + +10. **Handle images/assets** with placeholders: + ```markdown + + + ![Architecture Diagram](TODO:IMAGE) + ``` + +11. **Create PR** with comprehensive description (template below) + +12. **Update migration-map.md** status to "In Progress" for that section + +### PR Description Template + +```markdown +# [Section Name] Migration + +## Summary +Migration of [section name] documentation from versioned_docs into new reference structure. + +## Files Created +- reference_versioned_docs/version-v4/[section]/overview.md +- reference_versioned_docs/version-v4/[section]/page1.md +- reference_versioned_docs/version-v4/[section]/page2.md + +## Source Files Used + +### reference_versioned_docs/version-v4/[section]/overview.md +- `versioned_docs/version-4.7/path/to/file.md` (primary source) +- `versioned_docs/version-4.2/path/to/file.md` (for baseline features) +- `release_notes/4.3.0.md` (feature introduction dates) + +### reference_versioned_docs/version-v4/[section]/page1.md +- `versioned_docs/version-4.7/path/to/another.md` (primary) +- ... + +## Version Annotations Added + +### High Confidence (Confirmed via release notes) +- Feature X: Added in v4.3.0 +- Feature Y: Changed in v4.4.0 + +### Needs Verification +- Feature Z: Likely added in v4.3.0 (inferred from version comparison) +- Config option ABC: Possibly changed in v4.5.0 (mentioned in docs but not in release notes) + +## Link Placeholders Created +See `migration-context/link-placeholders/[section]-link-placeholders.md` for complete list. + +Summary: +- 12 placeholders to operations-api section +- 5 placeholders to security section +- 3 placeholders to configuration section + +## Images/Assets Noted +- Line 45 of overview.md: TODO-IMAGE for architecture diagram +- Line 123 of page1.md: TODO-IMAGE for flow chart + +## Conflicts & Questions for Human Review + +### Content Conflicts +None (reference/ directory was reset) + +### Uncertainties +- Unclear if Feature Z was introduced in v4.3.0 or v4.4.0 - marked for verification +- Configuration option `foo.bar` mentioned in v4.5 docs but not in earlier versions or release notes + +## Migration Map Status +Updated status for this section to "In Progress" + +## Checklist for Human Reviewer +- [ ] Verify version annotations marked as "needs verification" +- [ ] Review content accuracy and completeness +- [ ] Check inline source comments are accurate +- [ ] Decide on image/asset handling +- [ ] Ensure link placeholders make sense +- [ ] Update migration-map.md status to "Complete" after merge +``` + +### Sections to Migrate (In Order of Priority) + +Based on migration map, recommend this order: + +**Phase 1A - Simple, Stable Sections (Parallel PRs)** +1. CLI (`reference_versioned_docs/version-v4/cli/`) +2. Content Types (`reference_versioned_docs/version-v4/rest/content-types.md`) +3. Headers (`reference_versioned_docs/version-v4/rest/headers.md`) +4. GraphQL Querying (`reference_versioned_docs/version-v4/graphql-querying/`) +5. Studio (`reference_versioned_docs/version-v4/studio/`) + +**Phase 1B - Medium Complexity (Parallel PRs)** +6. Security (`reference_versioned_docs/version-v4/security/`) +7. Environment Variables (`reference_versioned_docs/version-v4/environment-variables/`) +8. Static Files (`reference_versioned_docs/version-v4/static-files/`) +9. HTTP (`reference_versioned_docs/version-v4/http/`) +10. MQTT (`reference_versioned_docs/version-v4/mqtt/`) +11. Logging (`reference_versioned_docs/version-v4/logging/`) +12. Analytics (`reference_versioned_docs/version-v4/analytics/`) + +**Phase 1C - Complex Sections (Parallel PRs, expect longer review)** +13. REST (`reference_versioned_docs/version-v4/rest/`) +14. Replication (`reference_versioned_docs/version-v4/replication/`) +15. Database (`reference_versioned_docs/version-v4/database/`) +16. Resources (`reference_versioned_docs/version-v4/resources/`) +17. Components (`reference_versioned_docs/version-v4/components/`) + +**Phase 1D - Cross-Cutting Sections (After others to minimize placeholders)** +18. Operations API (`reference_versioned_docs/version-v4/operations-api/`) +19. Configuration (`reference_versioned_docs/version-v4/configuration/`) + +**Phase 1E - Legacy Content (Simple moves)** +20. Legacy (`reference_versioned_docs/version-v4/legacy/`) + +### Progress Tracking + +Create GitHub issue to track progress: + +**Title**: "v4 Documentation Migration Progress Tracker" + +**Body**: +```markdown +Tracking migration of v4 documentation to consolidated structure. + +## Phase 1A - Simple Sections +- [ ] #[PR] CLI +- [ ] #[PR] Content Types +- [ ] #[PR] Headers +- [ ] #[PR] GraphQL Querying +- [ ] #[PR] Studio + +## Phase 1B - Medium Complexity +- [ ] #[PR] Security +- [ ] #[PR] Environment Variables +- [ ] #[PR] Static Files +- [ ] #[PR] HTTP +- [ ] #[PR] MQTT +- [ ] #[PR] Logging +- [ ] #[PR] Analytics + +## Phase 1C - Complex Sections +- [ ] #[PR] REST +- [ ] #[PR] Replication +- [ ] #[PR] Database +- [ ] #[PR] Resources +- [ ] #[PR] Components + +## Phase 1D - Cross-Cutting +- [ ] #[PR] Operations API +- [ ] #[PR] Configuration + +## Phase 1E - Legacy +- [ ] #[PR] Legacy Content + +## Part 2 - Link Resolution +- [ ] Links resolved + +## Part 3 - Cross-References +- [ ] Release notes updated +- [ ] Learn guides updated + +## Part 4 - Finalization +- [ ] Sidebars created +- [ ] Old content removed +- [ ] Redirects configured +``` + +After each PR is created, agent adds comment to this issue: +```markdown +Created PR #123 for [Section Name] migration +- Files: X created +- Placeholders: Y links need resolution +- Status: Awaiting human review +``` + +--- + +## Part 2: Human Review & Merge + +### For Each PR + +1. **Human reviews PR** using checklist in PR description +2. **Human edits content** as needed: + - Verify version annotations + - Improve writing/clarity + - Resolve uncertainties + - Handle image decisions +3. **Human approves and merges PR** +4. **Human updates migration-map.md** status to "Complete" +5. **Human checks off tracking issue** + +--- + +## Part 3: Link Resolution (AI-Driven) + +Once all Part 1 PRs are merged, resolve link placeholders. + +### Agent Instructions + +1. **Read all `migration-context/link-placeholders/*.md` files** +2. **Scan all `reference_versioned_docs/version-v4/` files** to build index of what exists +3. **For each placeholder**: + - Determine if target file exists + - If exists: replace `TODO:path` with actual relative path + - If doesn't exist: flag for human review (might be typo in original plan) +4. **Create PR(s)** for link resolution: + - Option A: One PR per section + - Option B: One large PR for all links + - Recommend: One PR per section for easier review +5. **PR description** should list: + - How many links resolved + - How many links couldn't be resolved (and why) + +### Link Resolution PR Template + +```markdown +# Link Resolution: [Section Name] + +## Summary +Resolved link placeholders in [section name] now that target pages exist. + +## Links Resolved +- `reference_versioned_docs/version-v4/[section]/file1.md` line 45: JWT Auth → `../security/jwt-authentication.md` +- `reference_versioned_docs/version-v4/[section]/file1.md` line 67: Operations → `../operations-api/operations.md` +- ... (X total links resolved) + +## Links Unable to Resolve +- `reference_versioned_docs/version-v4/[section]/file2.md` line 123: Target `TODO:reference_versioned_docs/version-v4/foo/bar.md` doesn't exist + - Recommendation: This might be a typo, should probably link to `../foo/baz.md` instead + +## Checklist +- [ ] Human verify resolved links are correct +- [ ] Human resolve any unresolvable links +- [ ] Delete corresponding `migration-context/link-placeholders/[section]-link-placeholders.md` after merge +``` + +--- + +## Part 4: Cross-Reference Updates (AI-Assisted) + +Update other parts of documentation that reference the old structure. + +### 4.1: Release Notes + +**Task**: Update internal links in release notes to point to new structure. + +**Agent Instructions**: +1. Scan all files in `release_notes/` +2. Find links to old paths (e.g., `/docs/4.7/...`, `/docs/developers/...`) +3. Map to new paths based on migration map +4. Create PR with updates + +### 4.2: Learn Guides + +**Task**: Update links in learn guides to point to new reference structure. + +**Agent Instructions**: +1. Scan all files in `learn/` (or wherever learn content lives) +2. Find links to old reference paths +3. Map to new paths +4. Create PR with updates + +### 4.3: Other Documentation + +**Task**: Find and update any other references to old paths. + +**Agent Instructions**: +1. Search entire repo for common old path patterns +2. Update as appropriate +3. Create PR with updates + +--- + +## Part 5: Sidebar Configuration (AI-Assisted) + +Create Docusaurus sidebar configuration for new structure. + +### Agent Instructions + +1. **Read the reference plan outline** to understand hierarchy +2. **Scan `reference_versioned_docs/version-v4/`** to see what actually exists +3. **Generate sidebar JSON/JS** following Docusaurus conventions: + ```javascript + { + type: 'category', + label: 'CLI', + items: [ + 'cli/overview', + 'cli/commands', + 'cli/operations-api-commands', + 'cli/authentication' + ] + } + ``` +4. **Follow existing sidebar patterns** from current docs +5. **Ensure non-collapsible sections** (as noted in reference plan) +6. **Create PR** with sidebar configuration + +--- + +## Part 6: Redirects Configuration (AI-Assisted) + +Configure redirects from old paths to new paths. + +### Agent Instructions + +1. **Analyze existing `redirects.ts`** (or wherever redirects are configured) +2. **Read sitemap** (if available) for list of old paths +3. **Use migration map** to determine new paths for old URLs +4. **Generate redirect rules**: + - Perfect redirects for mapped pages + - Catch-all redirects for unmapped pages (to appropriate section overview) +5. **Create PR** with redirect configuration + +### Redirect Priority + +Focus on: +1. Most visited pages (if analytics data available) +2. All `/docs/4.7/` paths (current latest) +3. Common paths across v4.2-v4.6 (many are duplicates) +4. Catch-all for everything else + +--- + +## Part 7: Cleanup & Finalization + +### 7.1: Orphaned Content Review + +**Human Task**: +1. Review "Files Being Removed" section in migration map +2. Confirm these files are intentionally not migrated +3. Document decision (move to legacy, move to learn, delete entirely) + +### 7.2: Remove Old Content + +**After all above steps complete**: +1. Create PR that removes old `versioned_docs/version-4.X/` folders +2. Only do this after confirming: + - All content is migrated or intentionally deprecated + - All orphaned content is accounted for + - Redirects are working + - Sidebars are updated + +### 7.3: Final Validation + +**Human Task**: +1. Build documentation locally +2. Spot check various pages +3. Test redirects +4. Verify no broken links +5. Check version annotations make sense + +### 7.4: Merge to Main + +Once everything on `major-version-reorg` branch is complete: +1. Final review of entire branch +2. Squash/organize commits if needed +3. Merge to `main` +4. Deploy + +--- + +## Agent Configuration Summary + +### Files Agents Should Reference + +**Primary**: +- `v4-docs-migration-map.md` - The authoritative source for what goes where +- `v4-docs-reference-plan.md` - Understanding structure and philosophy +- `versioned_docs/version-4.X/**/*.md` - Source content +- `release_notes/*.md` - Version annotation validation + +**Secondary**: +- `v4-docs-research.md` - Manual research notes +- `v4-feature-history-ai-gen.md` - AI-generated feature history (use with caution) + +### Agent Constraints + +**DO**: +- Add new files to `reference_versioned_docs/version-v4/` +- Include inline source comments +- Use link placeholders with TODO: prefix +- Create section-specific link placeholder trackers +- Add version annotations with confidence levels +- Flag uncertainties for human review +- Update migration-map.md status + +**DO NOT**: +- Remove anything from `versioned_docs/` (wait until Part 7) +- Add files to `reference/` (that's for v5 later) +- Guess at version annotations without noting confidence +- Skip inline source documentation +- Make assumptions about image handling without flagging + +### Link Placeholder Format + +**Standard format**: +```markdown +[Link Text](TODO:reference_versioned_docs/version-v4/section/page.md "Optional description of expected target") +``` + +**For images**: +```markdown + + +![Alt text](TODO:IMAGE) +``` + +### Version Annotation Format + +**High confidence**: +```markdown +Added in: v4.3.0 +``` + +**Needs verification**: +```markdown +Added in: v4.3.0 (inferred from version comparison, needs verification) +``` + +**Changed features**: +```markdown +Changed in: v4.4.0 + +[Describe the change] +In previous versions: [Describe old behavior] +``` + +**Deprecated features**: +```markdown +Deprecated in: v4.X.0 (moved to legacy in v4.7+) + +[Feature] is still supported but discouraged. See [alternative] for modern approach. +``` + +--- + +## Success Criteria + +- [ ] All sections from migration map have PRs created +- [ ] All PRs reviewed and merged by humans +- [ ] All link placeholders resolved +- [ ] Cross-references in release_notes and learn updated +- [ ] Sidebars configured +- [ ] Redirects configured +- [ ] Old versioned_docs removed +- [ ] Documentation builds without errors +- [ ] Spot checks confirm accuracy +- [ ] Branch merged to main + +--- + +## Estimated Timeline + +- **Part 1** (AI generation): Agents can work in parallel, ~1-2 days for all PRs +- **Part 2** (Human review): Depends on reviewer availability, estimate 1-2 weeks +- **Part 3** (Link resolution): ~1 day for agent work + ~2-3 days human review +- **Part 4** (Cross-references): ~1 day for agent work + ~1 day human review +- **Part 5** (Sidebars): ~1 day total +- **Part 6** (Redirects): ~1-2 days total +- **Part 7** (Cleanup): ~2-3 days total + +**Total estimated**: 3-4 weeks (heavily dependent on human review throughput) + +--- + +## Notes + +- Keep `versioned_docs/` intact throughout process as source of truth +- All work on `major-version-reorg` branch +- Human review is critical - AI does heavy lifting, humans ensure quality +- Link placeholders prevent getting blocked on interdependencies +- Section-specific placeholder files prevent merge conflicts +- Version annotations preserve historical context +- Inline source comments maintain traceability diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md new file mode 100644 index 00000000..19a308b7 --- /dev/null +++ b/v4-docs-migration-map.md @@ -0,0 +1,606 @@ +# Harper v4 Documentation Migration Map + +This document maps existing documentation paths from `versioned_docs/version-4.X/` and `reference/` to the new consolidated reference structure defined in [reference-plan-v4.md](./reference-plan-v4.md). + +## Legend + +- **Primary Source**: The version folder that should be used as the primary content source (usually v4.7) +- **Merge Required**: Content needs to be merged from multiple versions +- **Version Annotations**: Requires version history annotations based on earlier versions +- **Status**: Current migration status + - `Not Started` - No work done yet + - `In Progress` - Currently being migrated + - `Complete` - Migration finished + - `N/A` - Not applicable (content being removed/deprecated) + +--- + +## CLI Section + +### `reference/cli/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/deployments/harper-cli.md` +- **Additional Sources**: + - `versioned_docs/version-4.1/cli.md` (for baseline features) + - Current `reference/harper-cli.md` (if exists) +- **Merge Required**: Yes - CLI commands added across versions +- **Version Annotations**: Track command additions from v4.1 → v4.7 +- **Status**: Not Started + +### `reference/cli/commands.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` +- **Additional Sources**: Compare all versions for command evolution +- **Version Annotations**: Each command should note its introduction version +- **Status**: Not Started + +### `reference/cli/operations-api-commands.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` +- **Additional Sources**: `versioned_docs/version-4.3+` (CLI ops api support added in v4.3) +- **Version Annotations**: Note v4.3.0 introduction +- **Status**: Not Started + +### `reference/cli/authentication.md` +- **Primary Source**: New content or extract from CLI docs +- **Status**: Not Started + +--- + +## Configuration Section + +### `reference/configuration/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` +- **Additional Sources**: + - Current `reference/configuration.md` + - `versioned_docs/version-4.1/configuration.md` (baseline) +- **Status**: Not Started + +### `reference/configuration/options.md` +- **Primary Source**: Current `reference/configuration.md` (very comprehensive) +- **Additional Sources**: Compare all version-X/deployments/configuration.md files +- **Merge Required**: Yes - configuration options added across versions +- **Version Annotations**: Each config option needs version introduced +- **Status**: Not Started +- **Notes**: This will be a large migration task - the current configuration.md is 59KB + +### `reference/configuration/operations.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/configuration.md` +- **Additional Sources**: Earlier versions for feature evolution +- **Version Annotations**: Track when ops were added +- **Status**: Not Started + +--- + +## Operations API Section + +### `reference/operations-api/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/index.md` +- **Additional Sources**: + - `versioned_docs/version-4.2/developers/operations-api/index.md` (first structured ops api section) +- **Status**: Not Started + +### `reference/operations-api/operations.md` +- **Primary Source**: Synthesize from all `versioned_docs/version-4.7/developers/operations-api/*.md` files +- **Merge Required**: Yes - comprehensive list linking to primary references +- **Version Annotations**: Each operation needs version introduced +- **Status**: Not Started +- **Notes**: This should be a simplified reference table/list with links to detailed docs in feature sections + +--- + +## Security Section + +### `reference/security/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/index.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/security/configuration.md` +- **Status**: Not Started + +### `reference/security/basic-authentication.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/basic-auth.md` +- **Additional Sources**: `versioned_docs/version-4.1/security/basic-authentication.md` +- **Version Annotations**: Available since v4.1.0 +- **Status**: Not Started + +### `reference/security/jwt-authentication.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/jwt-auth.md` +- **Additional Sources**: `versioned_docs/version-4.1/security/jwt.md` +- **Version Annotations**: Available since v4.1.0 +- **Status**: Not Started + +### `reference/security/mtls-authentication.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/mtls-auth.md` +- **Additional Sources**: `versioned_docs/version-4.3/developers/security/mtls-auth.md` +- **Version Annotations**: Added in v4.3.0 +- **Status**: Not Started + +### `reference/security/certificate-management.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-management.md` +- **Additional Sources**: + - `versioned_docs/version-4.1/security/certificate-management.md` + - `versioned_docs/version-4.4+` (dynamic cert management added) +- **Merge Required**: Yes - dynamic certificate management added in v4.4 +- **Version Annotations**: Dynamic certs added v4.4.0 +- **Status**: Not Started + +### `reference/security/certificate-verification.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-verification.md` +- **Version Annotations**: Added in v4.7.0 (OCSP support) +- **Status**: Not Started + +### `reference/security/cors.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/configuration.md` +- **Status**: Not Started + +### `reference/security/ssl.md` +- **Primary Source**: Extract from security/configuration or certificate management docs +- **Status**: Not Started + +### `reference/security/users-and-roles.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/security/users-and-roles.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/operations-api/users-and-roles.md` + - `versioned_docs/version-4.7/reference/roles.md` + - Current `reference/defining-roles.md` +- **Merge Required**: Yes - content spread across multiple files +- **Status**: Not Started + +--- + +## Components Section + +### `reference/components/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/components/index.md` +- **Additional Sources**: + - `versioned_docs/version-4.1/custom-functions/*` (for evolution context) + - `versioned_docs/version-4.2/developers/applications/index.md` + - Current `reference/components/index.md` +- **Merge Required**: Yes - tell the evolution story (custom functions → components → applications/extensions → plugins) +- **Version Annotations**: + - Custom Functions: v4.1.0 + - Components concept: v4.2.0 + - Applications/Extensions: v4.3.0+ + - Plugin API: v4.6.0 +- **Status**: Not Started +- **Notes**: This is a critical page that explains the evolution + +### `reference/components/applications.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/components/applications.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/applications/*.md` + - Current `reference/components/applications.md` +- **Merge Required**: Yes - application developer docs scattered across multiple files +- **Status**: Not Started + +### `reference/components/extension-api.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/components/extensions.md` +- **Additional Sources**: Current `reference/components/extensions.md` +- **Version Annotations**: Extension API formalized around v4.4-4.5 +- **Status**: Not Started + +### `reference/components/plugin-api.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/components/plugins.md` +- **Additional Sources**: Current `reference/components/plugins.md` +- **Version Annotations**: Added in v4.6.0 +- **Status**: Not Started + +--- + +## Database Section + +### `reference/database/overview.md` +- **Primary Source**: New content synthesizing how database system works +- **Additional Sources**: + - `versioned_docs/version-4.7/reference/architecture.md` + - Current `reference/architecture.md` +- **Status**: Not Started +- **Notes**: Should explain Resources + Schema + Auto-REST relationship + +### `reference/database/schema.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/applications/defining-schemas.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/reference/data-types.md` + - `versioned_docs/version-4.7/reference/dynamic-schema.md` + - Current `reference/defining-schemas.md` + - Current `reference/data-types.md` + - Current `reference/dynamic-schema.md` + - `versioned_docs/version-4.7/reference/blob.md` + - Current `reference/blob.md` + - Vector docs (if exists) +- **Merge Required**: Yes - comprehensive schema system documentation +- **Version Annotations**: + - Basic schemas: v4.2.0 + - Relations (@relation): v4.3.0 + - Computed properties: v4.4.0 + - Blob storage: v4.5.0 + - Vector indexing: v4.6.0 +- **Status**: Not Started +- **Notes**: Large consolidation - may want to keep blobs/vectors separate + +### `reference/database/data-loader.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/applications/data-loader.md` +- **Additional Sources**: Current `reference/data-loader.md` +- **Version Annotations**: Added in v4.5.0 +- **Status**: Not Started + +### `reference/database/storage-algorithm.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/storage-algorithm.md` +- **Additional Sources**: Current `reference/storage-algorithm.md` +- **Status**: Not Started + +### `reference/database/jobs.md` +- **Primary Source**: `versioned_docs/version-4.7/administration/jobs.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/operations-api/jobs.md` + - `versioned_docs/version-4.7/developers/operations-api/bulk-operations.md` +- **Merge Required**: Yes - jobs/bulk operations content scattered +- **Status**: Not Started + +### `reference/database/system-tables.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` +- **Additional Sources**: Current `reference/analytics.md` +- **Status**: Not Started +- **Notes**: System tables for analytics and other features + +### `reference/database/compaction.md` +- **Primary Source**: `versioned_docs/version-4.7/administration/compact.md` +- **Additional Sources**: Current `reference/compact.md` +- **Version Annotations**: Added in v4.3.0 +- **Status**: Not Started + +### `reference/database/transaction.md` +- **Primary Source**: `versioned_docs/version-4.7/administration/logging/transaction-logging.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/administration/logging/audit-logging.md` + - `versioned_docs/version-4.1/transaction-logging.md` + - `versioned_docs/version-4.1/audit-logging.md` +- **Merge Required**: Yes - combines audit and transaction logging +- **Version Annotations**: Transaction logging available since v4.1.0, audit logging since v4.1.0 +- **Status**: Not Started +- **Notes**: Consolidated from separate audit and transaction logging pages + +--- + +## Resources Section + +### `reference/resources/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` +- **Additional Sources**: Current `reference/resources/` folder +- **Status**: Not Started + +### `reference/resources/resource-api.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/reference/resources/instance-binding.md` + - `versioned_docs/version-4.7/reference/resources/migration.md` + - Current `reference/resources/index.md` + - Current `reference/resources/instance-binding.md` +- **Merge Required**: Yes - Resource API has two forms (with/without loadAsInstance) +- **Version Annotations**: + - Basic Resource API: v4.2.0 + - loadAsInstance changes: v4.4.0+ + - Response objects: v4.4.0 +- **Status**: Not Started + +### `reference/resources/global-apis.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/globals.md` +- **Additional Sources**: + - `versioned_docs/version-4.7/reference/transactions.md` + - Current `reference/globals.md` + - Current `reference/transactions.md` +- **Merge Required**: Yes - consolidate global APIs (tables, databases, transactions, etc.) +- **Version Annotations**: Various APIs added across versions +- **Status**: Not Started +- **Notes**: Should reference out to http/api.md for `server` global + +### `reference/resources/query-optimization.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/resources/query-optimization.md` +- **Additional Sources**: Current `reference/resources/query-optimization.md` +- **Status**: Not Started + +--- + +## Environment Variables Section + +### `reference/environment-variables/overview.md` +- **Primary Source**: New content about `loadEnv` plugin +- **Additional Sources**: Built-in extensions docs, configuration docs +- **Version Annotations**: loadEnv added in v4.5.0 +- **Status**: Not Started + +### `reference/environment-variables/configuration.md` +- **Primary Source**: Extract from configuration docs or components docs +- **Status**: Not Started + +--- + +## Static Files Section + +### `reference/static-files/overview.md` +- **Primary Source**: Extract from built-in plugins/extensions documentation +- **Additional Sources**: Current `reference/components/built-in-extensions.md` +- **Status**: Not Started + +### `reference/static-files/configuration.md` +- **Primary Source**: Extract from configuration docs +- **Status**: Not Started + +--- + +## HTTP Section + +### `reference/http/overview.md` +- **Primary Source**: New content about HTTP server +- **Additional Sources**: Configuration docs, architecture docs +- **Status**: Not Started + +### `reference/http/configuration.md` +- **Primary Source**: Extract from `reference/configuration.md` (http section) +- **Version Annotations**: + - HTTP/2 support: v4.5.0 +- **Status**: Not Started + +### `reference/http/api.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (server global) +- **Additional Sources**: Current `reference/globals.md` +- **Version Annotations**: + - server.authenticateUser: v4.5.0 +- **Status**: Not Started + +--- + +## REST Section + +### `reference/rest/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/rest.md` +- **Additional Sources**: Current `reference/rest.md` +- **Status**: Not Started + +### `reference/rest/querying.md` +- **Primary Source**: Extract from REST docs and NoSQL operations +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/operations-api/nosql-operations.md` +- **Version Annotations**: + - Null indexing/querying: v4.3.0 + - URL path improvements: v4.5.0 +- **Status**: Not Started + +### `reference/rest/headers.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/headers.md` +- **Additional Sources**: Current `reference/headers.md` +- **Version Annotations**: Track which headers were added/removed over versions +- **Status**: Not Started + +### `reference/rest/content-types.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/content-types.md` +- **Additional Sources**: Current `reference/content-types.md` +- **Status**: Not Started + +### `reference/rest/websockets.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` +- **Additional Sources**: Current `reference/real-time.md` +- **Status**: Not Started + +### `reference/rest/server-sent-events.md` +- **Primary Source**: Extract from real-time or REST docs +- **Status**: Not Started + +--- + +## MQTT Section + +### `reference/mqtt/overview.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` +- **Additional Sources**: Built-in plugins/extensions docs +- **Version Annotations**: + - MQTT features: v4.2.0+ + - mTLS support: v4.3.0 + - Single-level wildcards: v4.3.0 + - CRDT: v4.3.0 +- **Status**: Not Started + +### `reference/mqtt/configuration.md` +- **Primary Source**: Extract from configuration docs and real-time docs +- **Version Annotations**: Port change v4.5.0 (9925 → 9933) +- **Status**: Not Started + +--- + +## Logging Section + +### `reference/logging/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/administration/logging/index.md` +- **Additional Sources**: Current `reference/logging.md` (if exists) +- **Status**: Not Started + +### `reference/logging/configuration.md` +- **Primary Source**: Extract from configuration docs +- **Version Annotations**: + - Per-component logging: v4.6.0 + - Granular configuration: v4.6.0 +- **Status**: Not Started + +### `reference/logging/api.md` +- **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (logger global) +- **Status**: Not Started + +### `reference/logging/operations.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/logs.md` +- **Status**: Not Started +- **Notes**: Operations for managing standard logs (not transaction/audit logs, which moved to database section) + +--- + +## Analytics Section + +### `reference/analytics/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` +- **Additional Sources**: Current `reference/analytics.md` +- **Version Annotations**: + - Resource analytics: v4.5.0 + - Storage analytics: v4.5.0 +- **Status**: Not Started + +### `reference/analytics/operations.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/analytics.md` +- **Status**: Not Started + +--- + +## Replication Section + +### `reference/replication/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/replication/index.md` +- **Additional Sources**: Current `reference/replication/` (if exists) +- **Version Annotations**: + - Native Replication (Plexus): v4.4.0 +- **Status**: Not Started + +### `reference/replication/clustering.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/clustering/index.md` +- **Additional Sources**: + - All `versioned_docs/version-4.7/reference/clustering/*.md` files + - `versioned_docs/version-4.7/developers/operations-api/clustering.md` + - Current `reference/clustering/` folder +- **Merge Required**: Yes - extensive clustering documentation needs consolidation +- **Status**: Not Started +- **Notes**: Large section with many sub-pages + +### `reference/replication/sharding.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/replication/sharding.md` +- **Version Annotations**: + - Sharding: v4.4.0 + - Expanded functionality: v4.5.0 +- **Status**: Not Started + +--- + +## GraphQL Querying Section + +### `reference/graphql-querying/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/reference/graphql.md` +- **Additional Sources**: Current `reference/graphql.md` +- **Version Annotations**: + - Added: v4.4.0 (experimental) + - Disabled by default: v4.5.0 +- **Status**: Not Started +- **Notes**: Mark as experimental/incomplete + +--- + +## Studio Section + +### `reference/studio/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/administration/harper-studio/index.md` +- **Additional Sources**: All harper-studio/*.md files +- **Merge Required**: Maybe - consolidate or keep nested? +- **Status**: Not Started +- **Notes**: May want to keep as nested folder or consolidate into single page + +--- + +## Legacy Section + +### `reference/legacy/cloud/` +- **Primary Source**: `versioned_docs/version-4.7/deployments/harper-cloud/*` +- **Status**: N/A +- **Notes**: Move entire folder as-is, add deprecation notice + +### `reference/legacy/custom-functions/` +- **Primary Source**: `versioned_docs/version-4.1/custom-functions/*` +- **Additional Sources**: `versioned_docs/version-4.7/developers/operations-api/custom-functions.md` +- **Status**: N/A +- **Notes**: Move as-is with deprecation notice pointing to Components + +### `reference/legacy/sql/` +- **Primary Source**: `versioned_docs/version-4.7/reference/sql-guide/*` +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/operations-api/sql-operations.md` + - Current `reference/sql-guide/` +- **Status**: N/A +- **Notes**: Move entire section as-is with deprecation notice + +### `reference/legacy/fastify-routes/` +- **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` +- **Additional Sources**: Current `reference/define-routes.md` +- **Status**: N/A +- **Notes**: Deprecated in favor of modern routing + +--- + +## Files Requiring Special Attention + +### High Priority Merges +These files require careful merging from multiple sources: + +1. **Configuration Options** (`reference/configuration/options.md`) + - Current `reference/configuration.md` is comprehensive (59KB) + - Need to track every config option's version introduction + - Consider automated script to compare config files across versions + +2. **Schema System** (`reference/database/schema.md`) + - Merges: data-types, dynamic-schema, defining-schemas, blobs, vectors + - Significant evolution across v4.2 → v4.6 + - May want to split into multiple pages + +3. **Components Evolution** (`reference/components/overview.md`) + - Must tell the full story: custom functions → components → apps → plugins + - Critical for user understanding + +4. **Clustering** (`reference/replication/clustering.md`) + - 10+ files in current clustering/ folder + - Extensive operations APIs + - Significant changes between NATS and native replication + +5. **Resource API** (`reference/resources/resource-api.md`) + - Two flavors (instance-binding vs not) + - Migration path complex + - Significant API changes in v4.4 + +### Files Being Removed +These exist in current docs but won't exist in new structure: + +- `versioned_docs/version-4.7/administration/administration.md` - Generic admin intro +- `versioned_docs/version-4.7/administration/cloning.md` - Move to Learn guide +- `versioned_docs/version-4.7/developers/applications/debugging.md` - Move to Learn guide +- `versioned_docs/version-4.7/developers/applications/caching.md` - Move to Learn guide +- `versioned_docs/version-4.7/developers/applications/web-applications.md` - Move to Learn guide +- `versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md` - Move to Learn guide +- `versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md` - Move to Learn guide +- `versioned_docs/version-4.7/deployments/install-harper/*` - Move to Learn guides +- `versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md` - Move to Learn guide +- `versioned_docs/version-4.7/reference/index.md` - Generic intro page +- `versioned_docs/version-4.7/reference/limits.md` - Fold into database/overview or schema + +### Cross-References to Update +Files that heavily reference paths that will change: + +- All operations-api/*.md files reference other sections +- Security files cross-reference operations and configuration +- Components files reference configuration and operations +- Clustering files extensively cross-reference + +--- + +## Migration Workflow Recommendations + +1. **Start with stable, simple sections** (CLI, Content Types, Headers) +2. **Then tackle medium complexity** (Security, Logging, MQTT) +3. **Save complex merges for later** (Configuration, Schema, Components, Clustering) +4. **Move legacy content last** (SQL, Cloud, Custom Functions) + +## Version Annotation Checklist + +For each file migrated, ensure: +- [ ] Features note their introduction version +- [ ] Changed behaviors note the version they changed +- [ ] Deprecated features note deprecation version +- [ ] Configuration options include "Added in:" notes +- [ ] Operations APIs include "Added in:" notes +- [ ] Links to related version-specific content + +## Notes + +- Many current `reference/` files appear to already be partially reorganized +- The `versioned_docs/` folders contain the historical record +- Compare git history to validate when features were actually introduced +- Use release notes to cross-reference feature versions +- Consider scripting the version annotation process for configuration options diff --git a/v4-docs-project-brief.md b/v4-docs-project-brief.md new file mode 100644 index 00000000..20d72ea7 --- /dev/null +++ b/v4-docs-project-brief.md @@ -0,0 +1,364 @@ +# Harper v4 Documentation Migration - Project Brief + +**Last Updated**: 2026-02-18 +**Status**: Planning Complete - Ready to Execute +**Phase**: Pre-Pilot +**Branch**: `major-version-reorg` + +--- + +## Executive Summary + +We are consolidating Harper v4 documentation from seven versioned folders (v4.1 through v4.7) into a single, unified v4 reference using inline version annotations (Node.js style). Simultaneously, we're reorganizing from role-based categories ("Developers," "Administration") to a flat, feature-based structure (CLI, Database, REST, Components, etc.). + +This consolidation will improve documentation maintainability, make features more discoverable, and establish a strong foundation for v5 documentation. The migration involves 20 top-level sections, ~100+ individual pages, and will be executed using AI agents (Claude Code) for initial content generation with human review and refinement. + +**Target**: `reference_versioned_docs/version-v4/` (NOT `reference/` - that's for v5 later) + +--- + +## Quick Links + +- **[Reference Plan](./v4-docs-reference-plan.md)** - Target structure and philosophy (the "what" and "why") +- **[Migration Map](./v4-docs-migration-map.md)** - Detailed file-by-file mapping (the "where") +- **[Implementation Plan](./v4-docs-implementation-plan.md)** - Technical specifications for agents (the "how") +- **[Execution Procedure](./v4-docs-execution-procedure.md)** - Team workflow and process (the "who/when") +- **[Research Notes](./v4-docs-research.md)** - Manual research on feature evolution +- **[AI Feature History](./v4-feature-history-ai-gen.md)** - AI-generated feature timeline (use with caution) + +--- + +## Current Status + +### Phase Status +- ✅ Planning & Documentation Complete +- ⏸️ Team Review Pending +- ⏳ Pilot Execution Not Started +- ⏳ Scale Execution Not Started +- ⏳ Cleanup Not Started + +### Sections Status (0/20 Complete) + +**Phase 1A - Simple** (0/5) +- [ ] CLI +- [ ] Content Types +- [ ] Headers +- [ ] GraphQL Querying +- [ ] Studio + +**Phase 1B - Medium** (0/7) +- [ ] Security +- [ ] Environment Variables +- [ ] Static Files +- [ ] HTTP +- [ ] MQTT +- [ ] Logging +- [ ] Analytics + +**Phase 1C - Complex** (0/5) +- [ ] REST +- [ ] Replication +- [ ] Database +- [ ] Resources +- [ ] Components + +**Phase 1D - Cross-Cutting** (0/2) +- [ ] Operations API +- [ ] Configuration + +**Phase 1E - Legacy** (0/1) +- [ ] Legacy Content + +### Metrics +- **PRs Opened**: 0/20 +- **PRs Merged**: 0/20 +- **Link Placeholders Created**: 0 +- **Link Placeholders Resolved**: 0 +- **Days Elapsed**: 0 +- **Estimated Days Remaining**: 21-28 + +--- + +## Key Decisions Log + +### 2026-02-18: Initial Planning +- **Decision**: Use VSCode + Claude Code approach (vs fully automated Agent SDK) +- **Rationale**: Provides visibility and control; can pivot to automation if needed +- **Impact**: Requires manual orchestration but allows quality validation throughout + +### 2026-02-18: Target Directory +- **Decision**: Output to `reference_versioned_docs/version-v4/` not `reference/` +- **Rationale**: Clean separation; `reference/` will be used for v5 kickstart later +- **Impact**: Additional step required later to copy to `reference/` for v5 + +### 2026-02-18: Transaction Logging Reorganization +- **Decision**: Move transaction/audit logging from `logging/` to `database/` +- **Rationale**: Transaction logging is a database-level concern, not application logging +- **Impact**: Better conceptual organization; `logging/` focuses on app/system logs + +### 2026-02-18: Link Placeholder Strategy +- **Decision**: Use `TODO:path` format in actual markdown links with per-section tracker files +- **Rationale**: Easy to find/replace, works with markdown parsers, no merge conflicts +- **Impact**: Separate cleanup phase needed to resolve placeholders + +### 2026-02-18: Complete Sections in Single PRs +- **Decision**: Don't split large sections (like Configuration) into multiple PRs +- **Rationale**: Easier to review section holistically; context is preserved +- **Impact**: Some PRs will be large but provide complete picture + +### 2026-02-18: Pilot-First Approach +- **Decision**: Run CLI and Security as pilots before scaling +- **Rationale**: Validate quality and process before committing to full migration +- **Impact**: Adds ~2-3 days upfront but reduces risk of rework + +--- + +## Known Issues & Blockers + +### Current Blockers +*None - ready to begin execution* + +### Potential Risks +1. **Version annotation accuracy** - AI might infer wrong introduction versions + - *Mitigation*: Confidence levels + human verification + release notes validation + +2. **Content quality variability** - Some sections might need significant editing + - *Mitigation*: Pilot sections first; refine prompts based on learnings + +3. **Review capacity** - Team might be overwhelmed by 20 large PRs + - *Mitigation*: Flexible timeline; can slow down review velocity as needed + +4. **Link placeholder confusion** - Placeholders might be unclear or incorrect + - *Mitigation*: Clear format specification; dedicated cleanup phase + +### Watch Items +- [ ] Current `reference/` and `reference_versioned_docs/version-v4/` directories are empty (confirmed reset) +- [ ] All planning documents are up to date +- [ ] Team has capacity for 2-3 PR reviews per day +- [ ] GitHub tracking issue needs to be created before execution + +--- + +## Upcoming Milestones + +### Next Steps (Immediate) +1. **Present to team** - Review all planning docs, get feedback and buy-in +2. **Environment setup** - Verify VSCode, Claude Code, gh CLI ready +3. **Create tracking issue** - Set up GitHub issue for progress tracking +4. **Run Pilot 1 (CLI)** - Execute first section, evaluate quality +5. **Team sync** - Review pilot results, refine approach + +### Near-Term Milestones (Next 2 Weeks) +- [ ] Pilot sections complete (CLI + Security) +- [ ] Decision on scaling approach (continue VSCode or build automation) +- [ ] Phase 1A complete (5 simple sections) +- [ ] Phase 1B started (medium complexity sections) + +### Medium-Term Milestones (Next 4 Weeks) +- [ ] All 20 sections have PRs merged +- [ ] Link resolution complete +- [ ] Cross-references updated +- [ ] Sidebars configured + +### Long-Term Milestones (Next 6 Weeks) +- [ ] Redirects configured +- [ ] Old versioned_docs removed +- [ ] Final validation complete +- [ ] Merged to main + +--- + +## For AI Agents: Quick Context + +**Project Goal**: Migrate v4 docs from `versioned_docs/version-4.X/` → `reference_versioned_docs/version-v4/` with restructuring. + +**Your Role**: Generate initial content drafts by: +1. Reading migration map entry for assigned section +2. Reading all source files listed (primary + additional) +3. Reading release notes for version info +4. Generating new files with inline source comments and version annotations +5. Creating link placeholders for cross-references +6. Creating branch, committing, opening PR + +**Key Constraints**: +- ✅ DO add files to `reference_versioned_docs/version-v4/` +- ✅ DO include inline source comments +- ✅ DO use `TODO:path` format for link placeholders +- ✅ DO note confidence levels on version annotations +- ❌ DON'T remove anything from `versioned_docs/` yet +- ❌ DON'T add files to `reference/` (that's for v5) +- ❌ DON'T guess at version dates without noting uncertainty + +**Key Files to Reference**: +- `v4-docs-migration-map.md` - Your primary instruction source (which files to read, where to write) +- `v4-docs-implementation-plan.md` - Detailed agent instructions (Part 1) +- `v4-docs-reference-plan.md` - Target structure and philosophy +- `release_notes/*.md` - For version annotation validation + +**PR Template**: See `v4-docs-implementation-plan.md` Part 1 for complete template. + +**Success Criteria**: +- All files in correct location with proper structure +- Inline source comments on all content +- Version annotations with confidence levels +- Link placeholders in correct format +- Link placeholder tracker file created +- PR description complete and accurate + +--- + +## Team Assignments + +### Project Lead +- **Name**: Ethan +- **Responsibilities**: Overall coordination, decision making, pilot execution + +### Reviewers +*TBD after team discussion* + +### Execution Assignments +*To be determined after pilot phase* + +--- + +## Notes & Learnings + +### Planning Phase Insights +- Horizontal consolidation (v4.1→v4.7) + vertical reorganization (role-based→feature-based) are parallel transformations +- Starting with v4.7 as base and annotating backwards is more efficient than building forward from v4.1 +- Migration map revealed several complex merges (Configuration 59KB, Schema from 5+ files, Clustering 10+ files) +- Transaction/audit logging conceptually belongs with database, not application logging +- Current `reference/` folder was already partially reorganized (work in progress) + +### Process Improvements +*To be filled in as we learn from pilots and execution* + +### Template Refinements +*To be filled in as we refine prompts based on pilot results* + +### Common Issues +*To be filled in as patterns emerge during execution* + +--- + +## Change Log + +### 2026-02-18 - Project Initialization +- Created all planning documents +- Completed migration map (20 sections, ~100+ files mapped) +- Defined reference structure and philosophy +- Established execution procedure +- Ready for team review and pilot execution + +--- + +## Future Considerations + +### Post-Migration Tasks (Out of Scope for Now) +- Copy content from `reference_versioned_docs/version-v4/` to `reference/` to kickstart v5 +- Begin v5 documentation structure planning +- Consider automation for future minor version consolidations +- Evaluate if this approach works for v3 historical docs + +### Process Improvements for Next Time +- Could build Agent SDK automation upfront if this approach proves successful +- Template-based content generation for consistent structure +- Automated version annotation extraction from git history +- Automated redirect generation from sitemap analysis + +### Documentation Enhancements +- Consider adding diagrams/flowcharts to planning docs +- Video walkthrough of the process for future team members +- Automated progress dashboard from migration map status fields + +--- + +## Quick Reference + +### Directory Structure +``` +documentation/ +├── versioned_docs/ +│ ├── version-4.1/ # Historical (source) +│ ├── version-4.2/ # Historical (source) +│ ├── version-4.3/ # Historical (source) +│ ├── version-4.4/ # Historical (source) +│ ├── version-4.5/ # Historical (source) +│ ├── version-4.6/ # Historical (source) +│ └── version-4.7/ # Latest (primary source) +├── reference_versioned_docs/ +│ └── version-v4/ # TARGET (new consolidated docs) +├── reference/ # Empty (for v5 later) +├── migration-context/ +│ └── link-placeholders/ # Per-section placeholder trackers +└── *.md # Planning documents +``` + +### Common Commands +```bash +# Switch to migration branch +git checkout major-version-reorg + +# Create placeholder tracker directory +mkdir -p migration-context/link-placeholders + +# Check current status +git status + +# Create new migration branch for section +git checkout -b migration/[section-name] + +# Open PR via gh CLI +gh pr create --base major-version-reorg --title "..." --body "..." + +# Check all migration map status +grep "Status:" v4-docs-migration-map.md +``` + +### Key Metrics to Track +- Sections complete: `X/20` +- PRs open: `X` +- PRs merged: `X` +- Link placeholders: `X created, Y resolved` +- Days elapsed: `X` +- Average review time: `X hours/PR` + +--- + +## Questions & Decisions Needed + +### Before Pilot +- [ ] Team reviewed all planning docs? +- [ ] Reviewers assigned for pilot sections? +- [ ] GitHub tracking issue created? +- [ ] Environment setup verified? + +### After Pilot +- [ ] Is content quality acceptable? +- [ ] Are version annotations accurate? +- [ ] Is link placeholder format working? +- [ ] Continue with VSCode or build automation? +- [ ] Any prompt refinements needed? + +### Before Scaling +- [ ] Pilot learnings documented? +- [ ] Prompts refined based on pilot? +- [ ] Review assignments made? +- [ ] Ready to open 15-18 more PRs? + +### Before Cleanup +- [ ] All sections merged? +- [ ] Ready to start link resolution? +- [ ] Any orphaned content to address? +- [ ] Ready to configure sidebars/redirects? + +### Before Merge to Main +- [ ] All cleanup phases complete? +- [ ] Documentation builds successfully? +- [ ] Redirects tested? +- [ ] Final validation complete? +- [ ] Ready to remove old versioned_docs? + +--- + +**Note**: This is a living document. Update status, metrics, decisions, and learnings as the project progresses. diff --git a/v4-docs-reference-plan.md b/v4-docs-reference-plan.md new file mode 100644 index 00000000..fd1eab9f --- /dev/null +++ b/v4-docs-reference-plan.md @@ -0,0 +1,362 @@ +# Harper v4 Reference Docs Plan + +## Summary + +This plan addresses two major transformations of the Harper v4 documentation: + +**Horizontal Consolidation**: Merging versioned documentation from v4.1 through v4.7 into a single comprehensive v4 reference, using inline version annotations (similar to Node.js docs) to track when features were added, changed, or deprecated across minor versions. + +**Vertical Reorganization**: Restructuring the documentation from role-based categories ("Developers," "Administration") to a flat, feature-based organization where Harper's core capabilities (CLI, Database, REST, MQTT, Components, Security, etc.) are prominently displayed as top-level sections. + +### Key Strengths + +**Feature-First Organization**: Core features like Static Files, Environment Variables, and MQTT become immediately discoverable as top-level sections rather than buried in nested hierarchies. This mirrors how successful API documentation (Stripe, Node.js) is structured and reflects how Harper is actually built - around plugins and features. + +**Primary vs Secondary Reference Pattern**: Complex features that span multiple concerns (like Operations APIs used across different subsystems) have one exhaustive "primary" reference with other sections providing "quick reference" links. This solves discoverability without creating maintenance nightmares from duplication. + +**Non-Collapsible Navigation**: Following the Learn section's pattern, all reference sections are visible immediately in the sidebar using `overview.md` files instead of hidden index pages. Users can visually scan the full feature list without clicking to expand nested sections. + +**Pragmatic Legacy Handling**: Deprecated features (SQL, Custom Functions, Cloud) are moved to a dedicated `legacy/` section without extensive reorganization. This respects the evolution of Harper v4 while steering users toward current best practices. + +**Intelligent Redirect Strategy**: Using sitemap analysis, Google Analytics data, and AI assistance to ensure existing documentation URLs remain functional, prioritizing the most-visited pages for perfect redirects while handling the long tail with catch-alls. + +**Separation of Features from Concepts**: The plan distinguishes between standalone features (Components, Logging, REST) and cross-cutting concepts (caching, web applications) that are better documented as aspects of features or covered in Learn guides rather than separate reference sections. + +This reorganization will significantly improve Harper documentation maintainability going forward, make v4 capabilities more discoverable to new users, and establish a solid foundation for v5 documentation. + +--- + +The primary goal of this reorganization is to highlight key features of Harper and then pack as much information into it as possible. Thus the primary reorganization point is to no longer arbitrarily sort pages by attributes or tags like "developers" or "administration" and instead flatten out the structure and present content based on the most relevant feature. + +We can generally follow a lot of what exists today as well as loosely use Harper's built-in feature list as a starting point. Any built-in plugin is really a core feature. Users shouldn't have to navigate through nested sections and pages to discover that Harper can support static file hosting. Static Files is a core feature and should be prominently displayed. More examples include: CLI, Configuration, Operations API, Security, Components, REST, Database, Resources, Environment Variables, (proper list in the outline below). + +There will of course be some overlap, but by organizing by top-level feature we can ideally capture the core information for that feature all in one place. Then other sections that may overlap can link to the core reference while still providing some high-level information. If we want to get really fancy we can use MDX components or even store doc information in JSON and reference it programmatically throughout the section to ensure consistency across references. + +For example, a lot of features have relative Operations APIs associated with them, such as `deploy_component`. The core `deploy_component` operation will be primarily documented in `reference/components/operations`. This will contain exhaustive information on the operation including all options, examples, edge cases, version history, etc. The same operation will also be listed on the `reference/operations-api/operations` page, but with only "necessary" or "quick" details such as description and options. This will then link out to the primary documentation reference for the operation in case users need more than a "quick" look. We'll utilize this pattern so that no matter how a user navigates the docs they should find what they are looking for. + +Now obviously this could create synchronization issues if someone forgets to update the docs in one place but not the other. This is why things should only have one "primary" reference and be linked to from other "secondary" or "quick" references. + +## Difference between a feature and concept + +While we often advertise "caching" as one of Harper's key features, the reality is caching is a concept or aspect of other core features. Caching on Harper makes no sense to someone unfamiliar with our Resource API. So instead of having a dedicated top-level "Caching" section in the reference, we should concentrate on documenting the parts of the resource API and schema system that enable caching. Then accompany this with appropriate Learn guides that do focus on implementing certain caching examples. + +Similarly, web applications are a feature of a variety of built-in and custom plugins. + +## Deprecated/Legacy Content + +There has been many changes over the course of v4, and many more to come in v5. Nonetheless, since we are collapsing documentation into major versions, we need to do something with content that is only relevant to a previous minor. Keep in mind that as long as we follow strict semver for our versioning, then we'll never have to deal with documenting a _removed_ feature in any singular major. I'm not necessarily going to solve for that circumstance because it really shouldn't happen. + +However, we have historically deprecated or discouraged use of certain features over the course of a major version (custom functions, sql) while still maintaining support. We need a place to document these features less-prominently than active features so that we can continue to direct users in the right direction. + +I believe this may be the only circumstance to make an exception to the general feature-based organization strategy (sorta). These legacy / deprecated / discouraged features should be nested within a top-level "Legacy" section. They can still be nested sections themselves, and potentially this is the one place we'd break the no-collapsing rule. The existing `/docs/reference/sql-guide` and `/docs/4.1/custom-functions/*` sections contain many pages. We really do not need to waste time rewriting or organizing this information. The simplest solution is to just take what exists and move it to a new `/reference/legacy/` sub path. + +## Index.md vs Overview.md + +One issue that has made our documentation confusing is the deep nesting of content. Users have to click many times to expand all the side-nav sections to potentially find the title they are looking for. + +Furthermore, a lot of nested sections have index pages that may or may not contain important information. An index page is the page you see when clicking on a nested section title. Its not always clear that these nested section titles are even clickable. + +As the Learn section demonstrates, section titles should be non-collapsible and not clickable. However, reference docs generally benefit from some sort of an overview section for general information and what not. As a result we have a choice: continue to use index pages (and iterate on making them more intuitively discoverable) or switch to an `overview.md` file that always exists at the top of any reference section. + +This doc map assumes that we'd match the non-collapsible section headers like the Learn section has. +All sections and the docs within would be visible immediately. + +This means no "index" pages as sometimes users don't realize it exists along with the nested content. +What would traditionally be an index page should now go into `overview.md`. + +We may experiment with the index page pattern and compare/contrast, but I believe (based on the style and experience of the learn section), that this structure is least confusing. + +## Scope and Flexibility of the Outline + +The reference section outline below represents our best understanding of Harper v4's feature landscape based on documentation analysis from v4.1 through v4.7. However, it's important to note that this map intentionally walks a line between completeness and manageability. + +**This is a living guide, not a rigid specification.** As implementation progresses, we expect to: + +- **Discover additional pages or subsections** that make sense to add as we work through actual content migration +- **Consolidate pages** that turn out to have less content than anticipated +- **Split pages** that become unwieldy into multiple focused documents +- **Adjust organization** based on cross-referencing patterns that emerge during writing + +**The map intentionally avoids overwhelming detail** in some areas. For example, MQTT configuration and security features (like mTLS) are noted but not broken into extensive subsections, even though they might warrant dedicated pages during implementation. Similarly, some features with significant cross-cutting concerns (security, configuration) are kept streamlined in the outline but will naturally expand to reference related sections throughout the docs. + +**Feature-specific configuration and operations pages may fluctuate.** While some features clearly need dedicated configuration pages (like `logging/configuration.md`), others might fold configuration details into their overview or have configuration sufficiently covered in the central `configuration/options.md` page. These decisions will become clearer as we write the actual content. + +The goal is to provide enough structure to guide implementation while remaining flexible enough to adapt to what we learn along the way. + +## Version Annotations Strategy + +Since we're consolidating v4.1 through v4.7 into a unified v4 reference, we need a consistent way to annotate when features were introduced, changed, or deprecated across minor versions. This follows the Node.js documentation pattern of inline version history. + +### Annotation Patterns + +**For new features:** +```markdown +## Relationships + +Added in: v4.3.0 + +The `@relation` directive allows you to define relationships between tables... +``` + +**For changed features:** +```markdown +### Auto-increment Primary Keys + +Changed in: v4.4.0 + +Primary keys can now auto-increment when defined as `Any`, `Int`, or `Long` types. +In previous versions, only GUIDs were supported for `ID` and `String` types. +``` + +**For deprecated features:** +```markdown +## SQL Querying + +Deprecated in: v4.2.0 (moved to legacy in v4.7+) + +SQL querying is still supported but discouraged. Consider using the REST API +or custom resources for querying data. See [Database](../database/overview.md) +for modern alternatives. +``` + +**For configuration options:** +```markdown +## Logging Configuration + +### `logger.level` +- Type: `string` +- Default: `"info"` +- Added in: v4.1.0 + +### `logger.per_component` +- Type: `object` +- Default: `{}` +- Added in: v4.6.0 + +Allows granular logging configuration per component or plugin. +``` + +### Annotation Guidelines + +- Use simple text annotations for now (no YAML frontmatter) +- Place version info prominently at the start of sections +- For minor changes within a feature, inline the version info with the specific detail +- Always indicate both when something was added AND when it changed significantly +- For deprecated features, provide guidance on modern alternatives +- When documenting operations APIs or configuration, include version info in tables/lists +- Focus on minor version (v4.3.0) unless a patch introduced the feature, then include patch (v4.3.2) + +### Building Version History + +When migrating content: +1. Start with v4.7 documentation as the base (most current) +2. Compare with earlier versions (v4.6 → v4.5 → ... → v4.1) to identify when features appeared +3. Use release notes to validate feature introduction versions +4. Use git diff between version folders to catch subtle changes +5. Annotate as you build rather than trying to add annotations retroactively + +This approach ensures we preserve the evolution of Harper v4 while maintaining a single, coherent reference that serves users across all v4 minor versions. + +## Reference Section Outline + +``` +reference/ +├── cli/ +│ ├── overview.md # High-level overview of the Harper CLI. +│ │ # Include details such as general args, auth, +│ │ # and provide a list of all available commands +│ │ # with links to their appropriate detailed section +│ │ # (in the other pages). +│ │ +│ ├── commands.md # Detailed reference for each (non-operations api) CLI +│ │ # command including light examples. Remember to link to +│ │ # Learn section guides for more in-depth examples. +│ │ +│ ├── operations-api-commands.md # Detailed reference for each Operations API CLI command. +│ │ # Even if it may seem repetitive (with the actual respective +│ │ # operations api section), each command should clearly detail +│ │ # itself including description and available arguments. +│ │ +│ └── authentication.md # (Optional) Specific reference for CLI authentication +│ +├── configuration/ +│ ├── overview.md # High-level overview of Harper configuration, such as +│ │ # the `harper-config.yaml` file, configuration mechanisms, +│ │ # and maybe some architecture notes such as how some core +│ │ # features will require restarts, but other changes wont. +│ │ +│ ├── options.md # List/table of all options. include brief descriptions and +│ │ # any necessary info like data types and defaults. +│ │ # Keep in mind that features will contain their own config +│ │ # reference doc, and so this section should link out to the +│ │ # relative detailed docs. +│ │ +│ └── operations.md # List/table of all operations related to managing configuration +│ # in detail. +│ +├── operations-api/ +│ ├── overview.md # High-level info on operations api including basics like request +│ │ # shape and bonus features like health and open api endpoints. +│ │ # Should include authentication info, and link to the specific +│ │ # security pages for more details. +│ │ +│ └── operations.md # A complete simplified list of all operations that links out to +│ # specific sections for more details beyond short description and +│ # option data types. +│ +├── security/ +│ ├── overview.md # Deserves its own section since security is cross-feature and it +│ │ # can encompass pages on the specific security related operations +│ │ # and plugins like `tls`, JWT, and cert management. +│ │ # Many other sections will link to here when mentioning auth. +│ │ # The existing security section does a really excellent job of +│ │ # organization information. +│ │ +│ ├── basic-authentication.md # Basic auth mechanism details +│ │ +│ ├── jwt-authentication.md # JWT auth mechanism details +│ │ +│ ├── mtls-authentication.md # mTLS auth mechanism details +│ │ +│ ├── certificate-management.md # Certificate management details +│ │ +│ ├── certificate-verification.md # Certificate verification (OCSP, etc.) +│ │ +│ ├── cors.md # CORS configuration and usage +│ │ +│ ├── ssl.md # SSL/TLS configuration +│ │ +│ └── users-and-roles.md # User and role management including `roles` plugin +│ +├── components/ +│ ├── overview.md # What are components? Evolution from custom functions to +│ │ # components to applications/extensions to plugins. +│ │ +│ ├── applications.md # Application component details and API +│ │ +│ ├── extension-api.md # Extension API reference +│ │ +│ └── plugin-api.md # Plugin API reference +│ +├── database/ +│ ├── overview.md # Explain how Harper's data system is powered by Resources, but you don't +│ │ # necessarily have to build custom resources to utilize the database system. +│ │ # Detail how a lot is achievable using the schema system and auto rest api. +│ │ +│ ├── schema.md # `graphqlSchema` plugin and the schema system. Including detailed api info +│ │ # on the available directives and data types for schemas. likely a long page. +│ │ # Can optionally break some parts out into their own pages like "blobs" and +│ │ # "vector" as exists today. +│ │ +│ ├── data-loader.md # `dataLoader` plugin reference +│ │ +│ ├── storage-algorithm.md # Storage algorithm details +│ │ +│ ├── jobs.md # Bulk data and jobs operations +│ │ +│ ├── system-tables.md # Harper system tables for variety of features +│ │ +│ ├── compaction.md # Storage compaction and compression details +│ │ +│ └── transaction.md # Transaction logging details +│ +├── resources/ +│ ├── overview.md # Split off from previous "data/" section since resources are generally for +│ │ # custom implementations. The previous section is all schema and data stuff. +│ │ # This one is all about building custom resources including the jsResource +│ │ # plugin and global apis. Likely easiest to doc the plugin in this page and +│ │ # use other pages for the api reference. +│ │ +│ ├── resource-api.md # Currently the resource api is split into two separate reference files that +│ │ # are very similar but with the `loadAsInstance` thing have different signatures. +│ │ # Easiest to stick to that model until we can simplify in future majors. +│ │ +│ ├── global-apis.md # `tables`, `databases`, `transactions` etc. +│ │ # `server` has its own section so mention and link. +│ │ +│ └── query-optimization.md # Query optimization details and best practices +│ +├── environment-variables/ +│ ├── overview.md # `loadEnv` plugin overview and usage +│ │ +│ └── configuration.md # Environment variable configuration options +│ +├── static-files/ +│ ├── overview.md # `static` plugin overview and usage +│ │ +│ └── configuration.md # Static file serving configuration options +│ +├── http/ +│ ├── overview.md # HTTP server overview and architecture +│ │ +│ ├── configuration.md # `http` configuration options +│ │ +│ └── api.md # `server` global API reference +│ +├── rest/ +│ ├── overview.md # `rest` plugin and the overall system as it interacts +│ │ # with things like schemas and custom resources. +│ │ +│ ├── querying.md # REST querying syntax and capabilities +│ │ +│ ├── headers.md # HTTP headers used by REST API +│ │ +│ ├── content-types.md # Supported content types (JSON, CBOR, MsgPack, CSV) +│ │ +│ ├── websockets.md # WebSocket support via REST plugin +│ │ +│ └── server-sent-events.md # Server-Sent Events (SSE) support +│ +├── mqtt/ +│ ├── overview.md # MQTT plugin overview, configuration, and usage +│ │ +│ └── configuration.md # MQTT-specific configuration options +│ +├── logging/ +│ ├── overview.md # Logging system overview and architecture +│ │ +│ ├── configuration.md # Logging configuration options (per-component, granular, etc.) +│ │ +│ ├── api.md # Logger global API reference +│ │ +│ └── operations.md # Logging-related operations API +│ +├── analytics/ +│ ├── overview.md # Analytics system overview (resource/storage analytics, system tables) +│ │ +│ └── operations.md # Analytics-related operations +│ +├── replication/ +│ ├── overview.md # Replication system overview (native replication, Plexus) +│ │ +│ ├── clustering.md # Clustering configuration and management +│ │ +│ └── sharding.md # Sharding configuration and strategies +│ +├── graphql-querying/ +│ └── overview.md # GraphQL querying feature (experimental/incomplete) +│ +├── studio/ +│ └── overview.md # Studio documentation (still ships with v4 but moving to legacy) +│ +└── legacy/ + ├── cloud/ # Legacy cloud documentation (replaced by Fabric) + │ + ├── custom-functions/ # Custom functions (deprecated in favor of components) + │ + ├── sql/ # SQL guide (discouraged) + │ + └── fastify-routes/ # Fastify routes (discouraged) +``` + +## Redirects + +One major concern with modifying the `/docs/` path is we've used this for many years for our documentation content. It is safe to assume that many backlinks to these pages exist across the internet. From our own content, to external posts written by community members. Thus, we must have a detailed plan for supporting these paths as we migrate to a new structure. + +We can start by analyzing the docusaurus generated sitemap for all existing paths today. Then, using Google Analytics data for paths visited, we can find out what paths have been navigated to since we enabled analytics in October 2025. And finally, we can look to the existing redirects.ts file to understand what redirects have been created so far. + +With a little help from AI, we can use these inputs to create a historical site map of paths we must ensure are redirected. With the analytics data, we can even understand exactly which paths are visited most frequently and depending on the volume, focus our efforts on redirecting the top N% of pages versus creating perfect redirects for all thousand pages. + +That said, AI is also _pretty_ good at automating a redirect map too. + +There is really a plethora of solutions here ranging from least to most effort and user experience. All paths must have a redirect, but effort is generally determined by how many of them will have "perfect redirects" to the most relevant content versus being included in a catch-all and how easy it will be to maintain the redirects overtime. And furthermore, many of our pages are duplicated across v4.2 to v4.7. If we assume we can safely redirect all of those duplicate pages, then the problem set significantly reduces. diff --git a/v4-docs-research.md b/v4-docs-research.md new file mode 100644 index 00000000..40199a26 --- /dev/null +++ b/v4-docs-research.md @@ -0,0 +1,326 @@ +# v4 Docs Map + +The point of this is to figure out the documented evolution of Harper features and subsystems from v4.1 to v4.7 in order to create a holistic solution for reorganized singular major v4 versioned docs. + +This is aligned with renaming `docs/` to `reference/` and furthering the ideal separation of prose content into `learn/`. + +The structure I'm hoping to work towards is a single `reference/` folder layout that ideally has any _active_ features top-level. Then if there are legacy, deprecated, or even removed features (from a latest version), they will be documented in some sub-path such as `reference/legacy/` or something like that. + +When a feature has changed over time and some part of it is still active, but some other aspect has been deprecated; that feature should still live at top-level, but then that specific detail will be indicated as legacy/deprecated/removed. + +Since we are operating within a single major version; there realistically shouldn't have been any "removed" features. + +This information will also be extremely useful for guiding us on feature scope for v5 and beyond. + +My plan is to go through versioned docs folders one by one and try to logically piece together a map. I've given AI a couple runs at this. Including evaluating release-notes and everything else. Unfortunately, it doesn't do a great job and creating the correct timeline. Furthermore, it gets awfully confused by things like the evolution of custom functions to components to applications/extensions and finally plugins. So while I'm sure with enough context and better prompting an AI could figure out, I believe this will be easier to complete with a human touch; especially since I'm quite familiar with harper feature set already. + +## v4.1 + +Top Level sections include: +- Installation + - Replaced by both newer installation instructions and learn guides +- Getting Started + - Replaced entirely by learn guides +- External API docs (api.harperdb.io) which has since been redirected to operations api +- Studio + - We'll keep this around for now since it is still shipped with v4 +- Cloud + - No longer need to support these pages; cloud has been fully replaced by fabric +- Security + - Lots of these pages still exist today but have been updated. + - In general I don't think there is anything in here that would be version specific; other than the larger concepts. + - This section has "JWT" and Certificate Management. in later harper versions we've likely added to those and so we'll detail that version specificity in the respective pages. + - Like there should be something that dictates that JWT support has existed as early as v4.1 + - But say some other scheme (Cookies?) didn't work until whatever minor version +- Clustering + - Nested + - More detailed config info for `clustering:` section. + - includes some high level info for the feature + - includes some ops apis +- Custom Functions + - Nested + - Very operations-api based + - Also includes some ops apis like `restart_service` for reloading custom functions (we have component apis analogous to this today) + - I think this section highlights how we'll need some sort of "legacy" ops api page or if things have been renamed and updated we need to ensure its detailed that something like `add_custom_function_project` has become _whatever_ in todays ops api +- Add-ons and SDKs + - I believe we've deleted this page in latest versions; i think all of this external stuff is out of date today and wouldn't necessarily work even if we had a v4.1 user so we can keep it removed +- SQL Guide + - nested pages of SQL info +- CLI + - functional reference page; details many commands that still exist today + - we'll need to do a special detailed analysis of command additions/changes over time as we merge this content +- Configuration + - Very similar to the configuration page we have to day split up by sections + - we'll need to do a special detailed analysis of this page as we merge versions to ensure we correctly document the version history of option additions/changes + - Introduces the naming conventions (snake_case) we still use today +- Logging + - High-level explanation of structure logger + - No JS api info + - References to config (but links to configuration page) +- Transaction Logging + - (Operations API) + - Moved to `` in the future +- Audit Logging + - (Operations API) + - Moved to `` in the future +- Async Jobs + - (Operation API) + - This is moved to Operations API / Jobs in the future + - Looks almost identical; will need to do exact argument analysis later when reviewing operations API info +- Upgrade + - At this point in the doc history this page simply details how to update the harper version globally installed via nvm or npm or whatever package manager + - It also details that harper automatically handles upgrades by just running the `harperdb` command +- Reference + - Content Types + - JSON, CBOR, MsgPack, CSV + - All of these still exist today; and there is a standalone content type page + - Data Types + - This becomes schema docs in the future + - Dynamic Schema + - This becomes schema docs in the future + - Headers + - `server-timing`, `hdb-response-time`, `content-type` + - Limits + - Schema Naming restrictions + - Table limitation (attribute maximum) + - Storage Algorithm +- Support + - This doesn't need its own page anymore. We include links to things like discord and support email in multiple places throughout docs site + +## v4.2 + +First time pages have been nested into top-level sections Getting Started, Developers, Administration, Deployments, Reference. I think we absolutely want to get rid of these top level sections as they are just a bit confusing for reference docs. Its just more decisions a new user has to figure out on their own. When instead the left sidebar should just list as many top-level topics as it reasonably can so users can visually scan. Of course not everything has to be top-level. + +- Getting Started + - Completely replaced by Learn +- Developers + - Familiar structure to todays docs containing: + - Applications + - Guide like that has been / will be replaced by Learn content + - Subsections: + - Caching + - This is a key feature + - Debugging + - This isn't necessarily a reference page; replaceable by Learn guide and cross links from configuration page (thread.debug) to Learn guide focussing on debugging + - Fastify Routes + - Should become a reference page for Fastify plugin + - Schemas + - Should become a reference page for all details of schema system + - Also likely accompanied by detailed usage implementation guide in Learn + - Examples + - Marketing wants to have a page like this likely in Learn to start + - Components + - Oof! This is a confusing section; and I remember fixing this in later docs versions. + - So this subsection details the concept of Components; related them to "Applications" and "Extensions" too but also encapsulates pages for things like "Drivers", "Google Data Studio", "SDKs", etc. + - This has its own Operations page and Installation page too + - This obviously will continue to have its own top-level section which will properly encapsulate applications, plugins, etc. (in-fact we already have the start of this in docs/reference/components now so we'll build off of that) + - Operations API + - First time having its own standalone section containing sub pages for all operations api types + - Likely want to retain something like this and ensure this is the single source of reference for all operations apis. feature pages should link to this directly + - Real-Time + - This page still exists today in a similar fashion + - Need to consider making this nested i think and having dedicated pages for MQTT, WS, etc. + - Similar to ongoing idea below, likely want to have detailed ops/config info for any of these core features in their own reference section that parallels and links to/from other pages like a general overview pages. Akin to the general config or ops api page ideas, we could have another one for Real-Time that succinctly details the subsystems available, but then links out to reference and learn content depending what user wants. + - nonetheless things like mqtt is a standalone plugin; document it as such + - but something like ws isn't exactly; its a feature of REST so ensure its appropriately organized by the plugin and well referenced for other sensible locations. + - In this regard we may not need a top-level "Real Time" page. These specific features MQTT, WS, do deserve detailed reference pages and sections, but we don't have to arbitrarily group them like this. + - REST + - should remain top level but is truly a built-in plugin. can be structured like other plugin docs + - may need to think through how to incorporate all the configuration and subfeatures of this. like ws and interconnectedness with Resource API and things like content types. this goes back to the organization of information problem that this could live under an umbrella like "HTTP" or "Networking", but is there value in having higherlevel pages or can we just list this top-level along with everything else + - Security + - This might exist in v4.1 but aligned with some of the current thinking, this section has a "Configuration" page ... is this more like what we want out of dedicated sections for features and then having detailed subsections for similar parts? + - Instead of having a whole `security/configuration` page, I believe this could live in a root, or the relative configurations should go into a more specific topic. like `security/cors` and that can contain general reference as well as specific configuration info + - Otherwise, seeing some trend of existing feature scope here like Basic auth, Cert mgmt, JWT, and Users & Roles + - So just like other places; we likely don't need to lump these all into a "Security" subsection and they could just have their own top-level reference topic. +- Administration + - Best Practices + - This info should be migrated to a learn guide + - Clone Node + - A lot of configuration info; likely need to see how this maps to overall configuration changes over versions + - Needs a learn guide for sure but also some reference for the relative configuration options or ops apis + - Studio + - This was moved around from old version and still persists today + - Jobs + - same as v4.1 page; should just exist top level or be completely folded into operations api + - Logging + - nested all three "Audit", "Standard", and "Transaction" + - again, why nest? and furthermore, most of these pages are just operations reference. +- Deployments + - Configuration File + - Good start to an important reference page. as i've written else where, I likely want to have a configuration page be more general and then list out all options but link out to specific pages for detailed description and usage patterns. + - CLI + - similar as before; good reference and could use more detail and structure + - Cloud + - remove! replaced by fabric + - Install + - this is a learn guide now; any other info should be included else where like configuration page (in a subsection about say necessary installation variables or the like) + - The "On Linux" subpage should be a learn guide if its even still relevant. + - Upgrade + - likely can be removed or more simply retained. not as much upgrade info today. + - if there is actually some sort of api feature then it can documented in reference. but its just behavior of installation or something then absolutely simplify +- Reference + - Many of the following subsections can exist as is; this is the basis for what we want this whole `/docs` section to become. + - Analytics + - this is just a table; theres a few of these "system" tables that we could detail somewhere more technically + - Architecture + - high level info that would fit better in an earlier page or in something like applications + - new learn content already has this info in it. + - v4.2 contains a relatively simple and still relevant ascii diagram we could bring back! + - Clustering + - generally just keep this as is + - this is actually a good example of the ops api pattern I want other subsystems to align with. All the nitty gritty detail is in here including ops apis and such. Any other pages with this info are light and should generally link to this. + - Content Types + - Same as before; hasn't changed much. + - notes this is specific to operations api + - how does this apply to rest endpoints and custom resources exactly? + - what about adding additional content types? (or is that a later version feature) + - Data Types + - same as before; should be folded into a schema reference + - Dynamic Schema + - as early as v4.2 we have this information disorganization where the user needs to read multiple different pages to even understand what the schema system is made of. if they missed the "defining schema" guide early on then this page and the previous make little sense. + - schemas system needs a detailed reference page! + - Globals + - beginning of some js api reference docs that are important for extensions (at this time), but now applications and plugins + - Headers + - looks like we've already removed one of the headers previously defined in v4.1 + - Limits + - same page as before; very light on information. not sure how relevant it is today + - Resource Class + - need to take a close look at this reference page especially how its evolved over latest versions. its very detailed and complete enough but as we merge versions need to take special care about documenting appropriate versions where things were added or modified. + - SQL + - same as before; likely being moved to a "legacy" or "deprecated" section in latest docs + - Storage Algorithm + - useful technical info; where is this today? Could likely be apart of a larger "DB" section or something or just "Additional Technical Details" as it doesn't have too much relevant info for app or even plugin devs. + - Transactions + - is this another global api? + - need to see what the state of this is today and ensure its represented in appropriate places like globals page + - now maybe global page needs to be high level and we need separate pages for each api within it too? like logger could exist in logger of course. all the server stuff could exist in a Networking or simply "Server" part. + +## 4.3 + +In v4.3, the docs didn't change much. There are only a couple new files `administration/compact.md` and `developers/security/mtls-auth.md`. Within the `administration/harperdb-studio/` directory, a few files changed between the versions. + +The different file paths can be retrieved using: + +```javascript +let v42 = fs.readdirSync('versioned_docs/version-4.2', { recursive: true }); +let v43 = fs.readdirSync('versioned_docs/version-4.3', { recursive: true }); +let v42_set = new Set(v42); +let v43_set = new Set(v43); +// Files removed/renamed in v43 +v42_set.difference(v43_set); +// Set(4) { +// 'administration/harperdb-studio/instance-example-code.md', +// 'administration/harperdb-studio/manage-clustering.md', +// 'administration/harperdb-studio/manage-functions.md', +// 'administration/harperdb-studio/manage-schemas-browse-data.md' +// } +// Files created/renamed in v43 +v43_set.difference(v42_set); +// Set(5) { +// 'administration/compact.md', +// 'administration/harperdb-studio/manage-applications.md', +// 'administration/harperdb-studio/manage-databases-browse-data.md', +// 'administration/harperdb-studio/manage-replication.md', +// 'developers/security/mtls-auth.md' +// } +``` + +Looking at the 4.3.0 release notes, we see a number of new features: + +- Relationships and Joins with the `@relation` custom directive in schemas +- OpenAPI specification from ops api `GET /openapi` +- General query optimizations +- Indexing `null` values enabling querying by nulls `GET /Table/?attribute=null` +- CLI expanded to support certain ops apis +- BigInt support in schema system +- Studio upgrade +- MQTT upgrades such as mTLS support, single-level wildcards, CRDT, config changes, and more. +- Storage perf improvements with compaction and compression + +There may be other changes too; but since the file structure is mostly the same we can likely utilize git `diff` to determine any notable changes to things. + +## 4.4 + +A similar analysis comparing 4.4 to 4.3 shows that there are a number of new docs files. There was a bit of moving things around and renaming some things (like harperdb-cli.md to harper-cli.md) which causes a little confusion in the file history. but notably 4.4 was when we started adding distinct files for components (like built-in.md, managing.md, and reference.md); this was the first pass at really updating the definition for components overall. Furthermore, this version contains things like native replication and so some new pages exist for that. Finally, this was also about when we started creating things like Next.js support so files like `developers/applications/web-applications.md` was added. + +Unfortunately, it looks like in [#303 (Restructure developer onboarding)](https://github.com/HarperFast/documentation/blob/ade07fd9428b0321c047ac8243ad1106bb0de2a8/versioned_sidebars/version-4.4-sidebars.json) the `developers/` tab in the sidebar was removed and has gone unnoticed for ~ 4 months. + +The paths still exist, but are just missing from the sidebar navigation. + +The 4.4.0 release note outlines all new features for this minor: + +- Native Replication (codename Plexus) which uses direct WS connections +- Replication sharding as part of the new system +- Replicated operations and rolling restarts for clustered nodes +- Computed Properties for schema system +- Custom indexing using computed properties +- Native GraphQL querying support (experimental; provisional; incomplete) +- Dynamic certificate management +- Custom resource methods can now return `Response` objects (or a Response-like object) +- Auto-increment primary keys when defined as type `Any`, `Int`, or `Long`. `ID` and `String` continue to use GUIDs. +- Installation now includes dev v prod defaults +- Exported resources can now be configured to be specifically exported by a certain protocol (REST, MQTT, etc.) for more granular control over what is exported where + +## 4.5 + +There is really only one new file in v4.5, `reference/blob.md`, but the list of features is longer than before: + +- Blob storage +- password hashing upgrade (sha256, argon2id) +- resource and storage analytics +- Default replication port was changed from 9925 to 9933 +- Expanded property access even if they aren't defined in a schema +- Storage reclamation (more of a platform feature than any kind of api) +- Expanded sharding functionality +- Certificate revocation in clustering +- Built-in `loadEnv` plugin for environment variable loading +- `cluster_status` operation updates +- Improved URL path parsing for resources +- `server.authenticateUser` API +- HTTP/2 support fo api endpoints (`http2` option) +- transactions can now be reused after calling `transaction.commit()` +- GraphQL query endpoint can be configured to listen on different ports; its also now disabled by default to avoid conflicts +- Global file handling improvements for components +- `Table.getRecordCount()` api +- Removed record counts from REST API + +## 4.6 + +There are more file changes in v4.6 docs; this is when I added numerous files for components and moved them from `developers/` to `reference/`. + +This is also when more resource reference pages were added + +In addition to that new features include: + +- Vector indexing: HNSW alg for tables +- Improvements to Extension system +- Plugin API! +- Logging improvements (granular configuration, per plugin/app configuration) +- Data Loader built-in plugin +- resource API changes (loadAsInstance and what not) +- Fixed `only-if-cached` behavior to not make a background request + +## 4.7 + +Only one new file; `'developers/security/certificate-verification.md'` + +Feature list much smaller: +- individual component status monitoring +- OCSP support +- new analytics and licensing functionality (for Fabric) +- Plugin API changes + +## Migration Ideas + +From early on (v4.1) many features were fully controlled by ops apis. And at first they were presented based on the feature at hand. Like "Clustering", "Custom Functions", etc. and within the docs for that feature it included whatever relevant ops apis were needed. This makes me think that while we should have a technical reference for _all_ operations apis, it may be valuable to also associated specific ops apis with their relative feature. Like how is a user supposed to know if they want to do _clustering_ that they need to first look "ops apis"? Having a top level "Clustering" is valuable. That said; this is in part what the Learn section is meant to solve. Users should learn about how to Clustering via Learn guides. And then they can click through to reference pages for any other information. We also have Search in order to discover whatever specific ops apis. I think organizing the ops apis under an "Operations APIs" section is still correct but we should ensure discoverability. Maybe we don't nest it and just have them all viewable by default as soon as someone is looking at the left sidebar in Reference. + +Just from reviewing v4.1 docs it is starting to show ideal core systems to document such as CLI, Operations API, Configuration, Schemas, Logging. Like the previous paragraph stated, some thought needs to be given to how information is organized. Logger is a great example of having configuration details, usage details, and API reference details. So should all of that exist under "Logging" or should it be spread out between sections? I think the reality is we'll need a bit of "both". Where there should be top-level sections "Configuration" and "Logging". Under configuration, it should have the general info about the config file and snake_case mapping to CLI options or operations API values, and it should list out all available configuration properties in a structure way (think JSON schema). Include short descriptions, but for any actual detail around say the `logger` section, it should link out to the Logging section for further information. Like expanded descriptions for example. Additionally, any "guide" or usage like info should be delegated to learn guides. But with this thinking; how should operations apis be documented? + +Should we simplify Ops Api section to include general ops api info (requests, endpoints, w/e), and then have a table/list of available (and deprecated) ops apis with short descriptions and then links out to other docs (related to the respective feature) that details the op? + +Could we introduce some form of a "tag" system for pages? This could help with information organization as we could get rid of top-level pages like "Real-Time" or "Security" and just tag relevant sections based on some of those top-level topics. We could incorporate these tags into search or even some of navigation mechanism. This may be more satisfactory of a compromise for self-navigation. Its simpler than trying to come up with overly organized top-level sections, and is better than search (though AI search would definitely trump this). I think a fundamental issue is that users still are hesitant to use search since its traditionally such a poor experience. Now with AI baked in its improved tremendously but still users aren't gravitating towards it. Many are simply used to self-navigating and so we need to find some compromise. Going back to concept of "tags", idk if that necessarily solves that problem unless we introduce a more interactive search page. I think i'd rather just ensure that searching `"networking"` will actually return pages like HTTP, REST, MQTT, w/e. + +As I make my way through later v4 minors (4.3, 4.4, 4.5) its starting to show how the docs structure from as early as 4.2 doesn't change all too much. If I can sufficiently map out the top-level features to document, then come up with a reasonable format/structure for pages (like how to actually detail changes over versions), we should be in a really good place. Overall we'll significantly simplify the reference docs and make it much easier to maintain going into v5. We'll meet our obligation to provide "support" for existing v4 minors since we'll have changes documented. We've done an excellent job not breaking any apis over the development of v4 so in theory there shouldn't be much concern if say a v4.5 user was reading v4 docs which are more representative of latest v4.7 information but also contain notes about how things had changed for any particular part from v4.5 to v4.6 and beyond. + +The real challenge in all of this is to figure out the high-level organization of information. I've flip-flopped a bit between high-level general pages and how everything should be organized, but I think through a lot of this it seems apparent we should document individual plugins and features thus the docs will logically map to the implementation. There will obviously be some cross-cuts, but i think organizing by feature makes the most sense. \ No newline at end of file diff --git a/v4-feature-history-ai-gen.md b/v4-feature-history-ai-gen.md new file mode 100644 index 00000000..59e4c1d6 --- /dev/null +++ b/v4-feature-history-ai-gen.md @@ -0,0 +1,1692 @@ +# HarperDB v4 Feature Mapping & Version History +## Consolidation Reference Document + +**Purpose**: This document maps all features across HarperDB v4.1 through v4.7 to guide the consolidation of versioned documentation into a single unified directory. It tracks feature introductions, changes, deprecations, and removals. + +**Created**: 2026-02-05 +**Source Analysis**: versioned_docs (v4.1-v4.7) + release-notes/v4-tucker + +--- + +## Table of Contents +1. [Executive Summary](#executive-summary) +2. [Critical Deprecations & Removals](#critical-deprecations--removals) +3. [Major Feature Additions by Version](#major-feature-additions-by-version) +4. [Feature-by-Feature Version History](#feature-by-feature-version-history) +5. [Operations API Evolution](#operations-api-evolution) +6. [Documentation Structure Evolution](#documentation-structure-evolution) +7. [Consolidation Action Items](#consolidation-action-items) + +--- + +## Executive Summary + +### Major Architectural Changes + +**v4.2.0 (January 2024)** - THE PIVOTAL RELEASE +- Complete documentation restructuring from feature-based to role-based organization +- Introduction of Component Architecture (Applications + Extensions) to replace Custom Functions +- Resource API introduced as unified data access interface +- 11 top-level directories reduced to 5 organized categories + +**v4.3.0 (March 2024)** - "Tucker Release" +- Relationships and foreign key support +- Query optimization and BigInt support + +**v4.4.0 (October 2024)** +- Native replication system (Plexus) +- GraphQL support +- Sharding +cxxsx +**v4.5.0 (March 2025)** +- Blob storage system +- Password hashing upgrades + +**v4.6.0 (June 2025)** +- Vector indexing (HNSW) +- Data loader component +- New Extension/Plugin API + +**v4.7.0 (October 2025)** +- Component status monitoring +- OCSP certificate support +- Formal deprecation of Custom Functions + +### Key Statistics + +| Version | Total Docs | Major Features Added | Deprecations | +|---------|-----------|---------------------|--------------| +| v4.1 | 92 files | (baseline) | - | +| v4.2 | 101 files | Component Architecture, Resource API | Custom Functions moved to Ops API | +| v4.3 | 101+ files | Relationships, BigInt, CRDT | - | +| v4.4 | 101+ files | Native Replication, GraphQL, Sharding | - | +| v4.5 | 114+ files | Blob Storage, HTTP/2 | - | +| v4.6 | 114+ files | Vector Indexing, Plugin API | - | +| v4.7 | 114+ files | Status Monitoring, OCSP | Custom Functions deprecated | + +--- + +## Critical Deprecations & Removals + +### 1. Custom Functions → Component Architecture + +**Status**: DEPRECATED in v4.7, replaced by Applications + Extensions + Plugins + +#### Version Timeline +- **v4.1**: Featured as top-level `custom-functions/` directory (12 files) +- **v4.2**: Moved to `developers/operations-api/custom-functions.md` (consolidated) +- **v4.2**: Component Architecture introduced as replacement +- **v4.7**: Marked with `:::warning Deprecated` banner + +#### Files Affected (v4.1) +``` +custom-functions/ +├── create-project.md +├── custom-functions-operations.md +├── debugging-custom-function.md +├── define-helpers.md +├── define-routes.md +├── example-projects.md +├── host-static.md +├── requirements-definitions.md +├── templates.md +└── using-npm-git.md +``` + +#### Migration Path +- **Custom Functions** → **Applications** (for HTTP routes and APIs) +- **Custom Functions** → **Extensions** (for background services) +- **Custom Functions** → **Plugins** (for system integrations, v4.6+) + +#### Consolidation Action +- ✅ Retain documentation under "Legacy/Deprecated" section +- ✅ Add prominent deprecation warning +- ✅ Cross-reference to Applications/Extensions/Plugins documentation +- ✅ Include migration guide + +--- + +### 2. Deprecated NoSQL Operation Parameters + +**Status**: DEPRECATED in v4.2+, alternatives provided + +#### Changed Parameters +| Deprecated Parameter | Replacement | Version Introduced | File | +|---------------------|-------------|-------------------|------| +| `search_attribute` | `attribute` | v4.2 | nosql-operations.md | +| `search_value` | `value` | v4.2 | nosql-operations.md | +| `search_type` | `comparator` | v4.2 | nosql-operations.md | + +#### Consolidation Action +- ✅ Document both old and new parameters +- ✅ Mark deprecated parameters with version labels +- ✅ Show equivalent examples using both syntaxes +- ✅ Add "Version History" section to nosql-operations documentation + +--- + +### 3. HarperDB Studio → Harper Studio + +**Status**: RENAMED in v4.2+ + +#### Version Timeline +- **v4.1**: `harperdb-studio/` (top-level) +- **v4.2**: `administration/harperdb-studio/` +- **v4.7**: `administration/harper-studio/` + +#### Documentation Changes +**Removed Files** (tied to Custom Functions): +- `manage-functions.md` (replaced by `manage-applications.md`) +- `manage-charts.md` + +**Added Files** (new features): +- `manage-applications.md` (v4.2+) +- `manage-replication.md` (v4.4+) + +#### Consolidation Action +- ✅ Use "Harper Studio" as primary name +- ✅ Add redirect/note mentioning previous "HarperDB Studio" name +- ✅ Merge manage-functions content into historical section + +--- + +## Major Feature Additions by Version + +### v4.2.0 (January 2024) - MAJOR RELEASE + +#### New Architecture +- **Component Architecture** - Applications, Extensions framework + - Files: `developers/applications/` (6 files), `developers/components/` (7 files) + +#### New APIs +- **Resource API** - Unified data access interface + - Files: `developers/resources/` directory +- **REST Interface** - RESTful data access + - Files: `developers/rest-interface.md` + +#### New Features +- **Real-Time Messaging** - MQTT, WebSockets, Server-Sent Events + - Files: `developers/real-time-messaging.md` +- **Configurable Database Schemas** - GraphQL schema syntax + - Files: `developers/applications/defining-schemas.md` +- **Clone Node Operation** - Database cloning + - Files: `developers/operations-api/clustering.md` + +--- + +### v4.3.0 (March 2024) - "Tucker Release" + +#### Data Model Enhancements +- **Relationships and Joins** - Foreign keys, many-to-one, one-to-many + - Files: Added to resource API documentation +- **Indexing Nulls** - Null value indexing support +- **BigInt Support** - Large integers (up to 1000 bits) + - Files: `reference/data-types.md` updated +- **CRDT Support** - Conflict-free replicated data types + - Files: Added to resource API documentation + +#### Developer Tools +- **OpenAPI Specification** - `/openapi` endpoint + - Files: `developers/operations-api/` updated +- **CLI Expansion** - Operations API commands from CLI + - Files: `deployments/harperdb-cli.md` updated +- **Query Optimizations** - Improved query planning + - Files: `reference/resources/query-optimization.md` (added in later version) + +#### Operations +- **Database Compaction** - `compact_database` operation + - Files: `developers/operations-api/system-operations.md` + +--- + +### v4.4.0 (October 2024) + +#### Clustering & Distribution +- **Native Replication (Plexus)** - New replication system via WebSocket + - Files: `developers/replication/` directory (NEW) + - `index.md` + - `configuration.md` + - `monitoring.md` + - `troubleshooting.md` +- **Sharding Support** - Data distribution across nodes + - Files: Integrated into replication documentation + +#### Data Model +- **Computed Properties** - Dynamic calculated properties + - Files: `reference/` updated +- **Custom Indexing** - Composite and full-text indexing via computed properties + - Files: `reference/` updated +- **Auto-incrementing Primary Keys** - Automatic numeric key generation + - Files: `reference/` updated + +#### APIs +- **GraphQL Support** - Native GraphQL querying + - Files: `reference/graphql.md` (NEW) + +#### Security +- **Dynamic Certificate Management** - Runtime certificate changes + - Files: `developers/operations-api/certificate-management.md` (NEW) + +#### System +- **Status Report on Startup** - Service status display + - Files: Logging documentation updated + +--- + +### v4.5.0 (March 2025) + +#### Storage +- **Blob Storage** - Efficient binary object handling with streaming + - Files: `reference/blob.md` (NEW) +- **Storage Reclamation** - Automatic cleanup when storage low + - Files: `reference/storage-algorithm.md` updated + +#### Security +- **Password Hashing Upgrade** - SHA256 and Argon2id support + - Files: `developers/security/` updated +- **Certificate Revocation** - Revoked certificate list support + - Files: `developers/security/certificate-verification.md` (NEW) + +#### Performance +- **HTTP/2 Support** - HTTP/2 protocol + - Files: `reference/` updated +- **Property Forwarding** - Standard property access syntax + - Files: `reference/` updated + +#### Analytics +- **Resource/Storage Analytics** - Enhanced metrics + - Files: `reference/analytics.md` (NEW) + +#### APIs +- **Table.getRecordCount()** - Record counting API + - Files: Resource API documentation updated + +#### Documentation Enhancements +- **Resources Directory** - New consolidated reference section + - Files: `reference/resources/` (NEW) + - `index.md` + - `instance-binding.md` + - `migration.md` + - `query-optimization.md` + +--- + +### v4.6.0 (June 2025) + +#### AI/ML +- **Vector Indexing (HNSW)** - Hierarchical Navigable Small World algorithm + - Files: Added to operations API documentation, resource API + +#### Component System +- **New Extension API** - Dynamic reloading support + - Files: `reference/components/extensions.md` updated +- **Data Loader** - JSON data loading component + - Files: `developers/applications/data-loader.md` (NEW) +- **Plugin API** - New iteration of extension system + - Files: `reference/components/plugins.md` (NEW) + +#### Operations +- **Logging Improvements** - Component-specific logging configuration + - Files: `administration/logging/` updated +- **Resource API Upgrades** - Improved ease of use + - Files: `developers/resource-api/` updated +- **only-if-cached behavior** - Improved caching directives + - Files: `developers/applications/caching.md` updated + +--- + +### v4.7.0 (October 2025) + +#### Monitoring +- **Component Status Monitoring** - Status collection from components + - Files: Operations API updated + +#### Security +- **OCSP Support** - Online Certificate Status Protocol for revocation + - Files: `developers/security/` updated + +#### Integration +- **Analytics/Licensing** - Fabric integration + - Files: `reference/analytics.md` updated, `developers/operations-api/analytics.md` (NEW) + +#### Component System +- **Plugin API Improvements** - Enhanced plugin system + - Files: `reference/components/plugins.md` updated + +#### Major Reorganization +- **Components Moved to Reference** - `developers/components/` → `reference/components/` + - 9 files reorganized + +--- + +## Feature-by-Feature Version History + +### Applications + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Custom Functions** | ✅ Featured | ⚠️ In Ops API | ⚠️ In Ops API | ⚠️ In Ops API | ⚠️ Deprecated | ⚠️ Deprecated | ❌ Deprecated | +| **Component Architecture** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | +| **Applications** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | +| **Extensions** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | +| **Plugins** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | +| **Data Loader** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | +| **Define Routes** | ✅ (CF) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Defining Schemas** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Defining Roles** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | +| **Caching** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | +| **Debugging** | ✅ (CF) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Example Projects** | ✅ (CF) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Web Applications** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | + +**Files**: +- v4.1: `custom-functions/` (12 files) +- v4.2-v4.6: `developers/applications/` (6 files) +- v4.7: `developers/applications/` (8 files) + +--- + +### Data Access APIs + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Operations API** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Resource API** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ Enhanced | ✅ Enhanced | ✅ | +| **REST Interface** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| **GraphQL** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **SQL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **NoSQL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **OpenAPI Spec** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | + +**Files**: +- v4.1: `reference/` (7 files) +- v4.2: `developers/operations-api/` (16 files), `developers/resource-api/` (NEW) +- v4.7: `developers/operations-api/` (20 files), `reference/graphql.md` + +--- + +### Data Model Features + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Dynamic Schema** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Configurable Schemas** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Relationships/Joins** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | +| **Foreign Keys** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | +| **Computed Properties** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **Custom Indexing** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **Auto-increment Keys** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **Vector Indexing** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | +| **BigInt Support** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | +| **CRDT Support** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | +| **Null Indexing** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | +| **Blob Storage** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | + +**Files**: +- v4.1-v4.2: `reference/data-types.md`, `reference/dynamic-schema.md` +- v4.3+: Enhanced data type documentation +- v4.4+: `reference/` expanded with computed properties, indexing +- v4.5+: `reference/blob.md` +- v4.6+: Vector indexing in operations API + +--- + +### Clustering & Replication + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Clustering (Legacy)** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **NATS Clustering** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Native Replication (Plexus)** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **Sharding** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **Clone Node** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Files**: +- v4.1: `clustering/` (13 files, top-level) +- v4.2-v4.7: `reference/clustering/` (13 files, moved) +- v4.4+: `developers/replication/` (NEW - 4 files) +- v4.7: `developers/operations-api/clustering-nats.md` (split from clustering.md) + +--- + +### Security Features + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Authentication** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Authorization/Roles** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **TLS/SSL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **JWT** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **LDAP** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **SAML** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Dynamic Cert Management** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| **Password Hashing Upgrade** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | +| **Certificate Revocation** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | +| **Certificate Verification** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | +| **mTLS Auth** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | +| **OCSP Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | + +**Files**: +- v4.1: `security/` (6 files, top-level) +- v4.2-v4.6: `developers/security/` (6 files) +- v4.7: `developers/security/` (8 files) + - Added: `certificate-verification.md`, `mtls-auth.md` + +--- + +### Real-Time & Messaging + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **MQTT** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| **WebSockets** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Server-Sent Events** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Files**: +- v4.2+: `developers/real-time-messaging.md` + +--- + +### Operations API - Specific Operations + +#### System Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `restart` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `get_system_information` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `get_configuration` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `set_configuration` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `compact_database` | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | + +**Files**: `developers/operations-api/system-operations.md` (v4.7+) + +--- + +#### Schema Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `create_schema` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `describe_schema` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `drop_schema` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +--- + +#### Table Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `create_table` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `describe_table` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `drop_table` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `create_attribute` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `drop_attribute` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +--- + +#### NoSQL Data Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `insert` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `update` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `upsert` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `delete` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `search_by_hash` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `search_by_value` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `search_by_conditions` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Parameter Changes**: +- v4.2+: `search_attribute` → `attribute` (deprecated) +- v4.2+: `search_value` → `value` (deprecated) +- v4.2+: `search_type` → `comparator` (deprecated) + +--- + +#### SQL Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `sql` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +--- + +#### User & Role Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `add_user` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `alter_user` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `drop_user` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `user_info` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `list_users` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `add_role` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `alter_role` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `drop_role` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +--- + +#### Clustering Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `add_node` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `update_node` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `remove_node` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `cluster_status` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `cluster_network` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `clone_node` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | + +**File Split** (v4.7): +- `clustering.md` → `clustering.md` + `clustering-nats.md` + +--- + +#### Custom Functions Operations (DEPRECATED) + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `get_functions` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | +| `set_functions` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | +| `drop_function` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | +| `deploy` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | +| `package` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | + +**Files**: +- v4.1: `custom-functions/custom-functions-operations.md` +- v4.2+: `developers/operations-api/custom-functions.md` +- v4.7: Marked with `:::warning Deprecated` + +--- + +#### Component Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `deploy_component` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| `drop_component` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| `package_component` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | +| `get_components` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Options Added** (tracked by version): +- `deploy_component`: + - v4.2: Initial implementation + - **Need to check**: When was `install_command` option added? + +--- + +#### Certificate Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `add_certificate` | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| `list_certificates` | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | +| `delete_certificate` | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | + +**Files**: `developers/operations-api/certificate-management.md` (v4.4+) + +--- + +#### Analytics Operations + +| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|-----------|------|------|------|------|------|------|------| +| `get_analytics` | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | + +**Files**: `developers/operations-api/analytics.md` (v4.7) + +--- + +### Logging Features + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Standard Logging** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | +| **Transaction Logging** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Audit Logging** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Component Logging** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | + +**Files**: +- v4.1: `logging.md`, `transaction-logging.md`, `audit-logging.md` (top-level) +- v4.2+: `administration/logging/` (directory) + - `index.md` + - `standard-logging.md` + - `transaction-logging.md` + - `audit-logging.md` + +--- + +### Administration Tools + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **HarperDB Studio** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| **Harper Studio** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Renamed | +| **Jobs** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Configuration** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Studio Files**: +- v4.1: `harperdb-studio/` (top-level) +- v4.2-v4.6: `administration/harperdb-studio/` (9 files) +- v4.7: `administration/harper-studio/` (9 files) + +**Configuration Files**: +- v4.1: `configuration.md` (top-level) +- v4.2+: `deployments/configuration.md` + +--- + +### SQL Features + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **SQL Operations** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **SQL Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Geospatial Functions** | ✅ (9 files) | ✅ (9 files) | ✅ (9 files) | ✅ (9 files) | ✅ (9 files) | ✅ (consolidated) | ✅ (consolidated) | +| **Math Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **String Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Date/Time Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Geospatial Consolidation**: +- v4.1-v4.5: `sql-guide/sql-geospatial-functions/` (9 individual files) + - `geoarea.md`, `geocontains.md`, `geoconvert.md`, `geocrosses.md`, `geodifference.md`, `geodistance.md`, `geoequal.md`, `geolength.md`, `geonear.md` +- v4.6+: `reference/sql-guide/sql-geospatial-functions.md` (consolidated) + +**Files**: +- v4.1: `sql-guide/` (13 files, top-level) +- v4.2+: `reference/sql-guide/` (6 files) + +--- + +### Deployment Options + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Docker** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Linux** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Windows** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **macOS** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **HarperDB Cloud** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Files**: +- v4.1: `install-harperdb/` (top-level), `harperdb-cloud/` (top-level) +- v4.2+: `deployments/install-harperdb/`, `deployments/harperdb-cloud/` + +--- + +### SDKs & Integrations + +| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|---------|------|------|------|------|------|------|------| +| **Node.js SDK** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Python SDK** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Java Driver** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **ODBC/JDBC** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| **Google Data Studio** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +**Files**: +- v4.1: `add-ons-and-sdks/` (3 files, top-level) +- v4.2: `developers/components/` includes SDK information + +--- + +## Documentation Structure Evolution + +### v4.1 Organization (Feature-Based) + +``` +version-4.1/ +├── add-ons-and-sdks/ [3 files] +├── clustering/ [13 files] +├── custom-functions/ [12 files] → DEPRECATED +├── getting-started/ [2 files] +├── harperdb-cloud/ [5 files] +├── harperdb-studio/ [9 files] +├── install-harperdb/ [3 files] +├── reference/ [7 files] +├── security/ [6 files] +├── sql-guide/ [13 files] +├── audit-logging.md +├── configuration.md +├── harperdb-cli.md +├── index.md +├── jobs.md +├── logging.md +├── support.md +├── transaction-logging.md +└── upgrade-hdb-instance.md + +Total: 92 files +``` + +--- + +### v4.2-v4.6 Organization (Role-Based) + +``` +version-4.2+/ +├── administration/ +│ ├── harper-studio/ [9 files] (v4.7 renamed) +│ ├── harperdb-studio/ [9 files] (v4.2-v4.6) +│ ├── logging/ [4 files] +│ ├── configuration.md +│ ├── jobs.md +│ └── index.md +├── deployments/ +│ ├── harperdb-cloud/ [5 files] +│ ├── install-harperdb/ [3 files] +│ ├── configuration.md +│ ├── harperdb-cli.md +│ └── index.md +├── developers/ +│ ├── applications/ [6-8 files, grew in v4.7] +│ ├── components/ [6-7 files] → moved to reference/ in v4.7 +│ ├── operations-api/ [16-20 files, grew over time] +│ ├── replication/ [4 files] (v4.4+) +│ ├── resource-api/ [multiple files] +│ ├── security/ [6-8 files] +│ ├── getting-started.md +│ ├── harperdb-applications-in-depth.md +│ ├── harperdb-cli.md +│ ├── real-time-messaging.md (v4.2+) +│ ├── rest-interface.md (v4.2+) +│ └── index.md +├── reference/ +│ ├── clustering/ [13 files] (moved from top-level) +│ ├── components/ [9 files] (v4.7+, moved from developers/) +│ ├── resources/ [4 files] (v4.5+) +│ ├── sql-guide/ [6 files] +│ ├── analytics.md (v4.5+) +│ ├── architecture.md +│ ├── blob.md (v4.5+) +│ ├── content-types.md +│ ├── data-types.md +│ ├── dynamic-schema.md +│ ├── globals.md +│ ├── graphql.md (v4.4+) +│ ├── headers.md +│ ├── limits.md +│ ├── roles.md +│ ├── storage-algorithm.md +│ ├── transactions.md +│ └── index.md +└── index.md + +Total (v4.7): 114+ files +``` + +--- + +### Key Structural Changes + +#### v4.1 → v4.2 (Major Reorganization) + +**Moved to Administration**: +- `harperdb-studio/` → `administration/harperdb-studio/` +- `logging.md`, `transaction-logging.md`, `audit-logging.md` → `administration/logging/` +- `jobs.md` → `administration/jobs.md` + +**Moved to Deployments**: +- `install-harperdb/` → `deployments/install-harperdb/` +- `harperdb-cloud/` → `deployments/harperdb-cloud/` +- `configuration.md` → `deployments/configuration.md` +- `harperdb-cli.md` → `deployments/harperdb-cli.md` + +**Moved to Developers**: +- `security/` → `developers/security/` +- `getting-started/` → `developers/getting-started.md` + +**Moved to Reference**: +- `clustering/` → `reference/clustering/` +- `sql-guide/` → `reference/sql-guide/` + +**New Directories Created**: +- `developers/applications/` +- `developers/components/` +- `developers/operations-api/` +- `developers/resource-api/` + +**Deprecated/Removed**: +- `custom-functions/` → consolidated to `developers/operations-api/custom-functions.md` +- `add-ons-and-sdks/` → integrated into `developers/components/` + +--- + +#### v4.6 → v4.7 (Components Reorganization) + +**Moved**: +- `developers/components/` → `reference/components/` + +**Reasoning**: Components (Applications, Extensions, Plugins) became core reference documentation rather than developer tools. + +--- + +## Operations API Evolution + +### Operations API File Structure by Version + +#### v4.1 (Implied from reference docs) +- Basic operations documented but not in dedicated directory + +#### v4.2 (16 files) +``` +developers/operations-api/ +├── clustering.md +├── custom-functions.md +├── index.md +├── nosql-operations.md +├── registration.md +├── schema-table-operations.md +├── sql-operations.md +├── user-role-operations.md +└── [8 more files] +``` + +#### v4.7 (20 files) +``` +developers/operations-api/ +├── analytics.md [NEW] +├── certificate-management.md [NEW - v4.4] +├── clustering-nats.md [NEW - split from clustering.md] +├── clustering.md +├── configuration.md [NEW] +├── custom-functions.md [DEPRECATED] +├── index.md +├── nosql-operations.md +├── registration.md +├── schema-table-operations.md +├── sql-operations.md +├── system-operations.md [NEW] +├── user-role-operations.md +└── [7 more files] +``` + +**Notable Additions**: +- v4.3: `compact_database` operation added to system operations +- v4.4: `certificate-management.md` for dynamic certificate operations +- v4.5: Enhanced system operations for storage reclamation +- v4.7: `analytics.md` for Fabric integration, `system-operations.md` consolidated + +**Notable Removals**: +- v4.7: `utilities.md` removed (content integrated elsewhere) + +--- + +## Consolidation Action Items + +### Phase 1: Deprecated Features Documentation + +#### 1.1 Custom Functions (CRITICAL) + +**Goal**: Create comprehensive legacy documentation with clear deprecation warnings and migration paths. + +**Actions**: +- [ ] Create new section: `legacy/custom-functions/` +- [ ] Migrate all 12 files from v4.1 `custom-functions/` directory +- [ ] Add deprecation banner to every page: + ```markdown + :::warning Deprecated in v4.7 + Custom Functions have been deprecated as of v4.7.0 (October 2025) and replaced by the [Component Architecture](/developers/applications/). + + **Migration Path**: + - For HTTP routes and APIs → [Applications](/developers/applications/) + - For background services → [Extensions](/reference/components/extensions/) + - For system integrations → [Plugins](/reference/components/plugins/) + + See the [Migration Guide](/legacy/custom-functions/migration-guide/) for detailed instructions. + ::: + ``` +- [ ] Create `legacy/custom-functions/migration-guide.md` with: + - Side-by-side code examples (Custom Functions vs. Applications) + - Feature mapping table + - Common migration scenarios + - Troubleshooting section +- [ ] Add version labels to all Custom Functions operations in Operations API docs +- [ ] Create redirect from old paths to legacy section + +**Files to Migrate**: +1. `create-project.md` +2. `custom-functions-operations.md` +3. `debugging-custom-function.md` +4. `define-helpers.md` +5. `define-routes.md` +6. `example-projects.md` +7. `host-static.md` +8. `requirements-definitions.md` +9. `templates.md` +10. `using-npm-git.md` + +--- + +#### 1.2 Deprecated NoSQL Parameters + +**Goal**: Document parameter evolution with clear version labels. + +**Actions**: +- [ ] Add "Parameter History" section to `nosql-operations.md` +- [ ] Create comparison table: + ```markdown + | Deprecated (v4.1) | Current (v4.2+) | Status | + |------------------|-----------------|---------| + | `search_attribute` | `attribute` | Deprecated | + | `search_value` | `value` | Deprecated | + | `search_type` | `comparator` | Deprecated | + ``` +- [ ] Show side-by-side examples with version labels +- [ ] Add deprecation warnings to examples using old parameters +- [ ] Document when support might be fully removed + +--- + +### Phase 2: Renamed Features + +#### 2.1 HarperDB Studio → Harper Studio + +**Goal**: Use current naming while acknowledging historical name. + +**Actions**: +- [ ] Use "Harper Studio" as primary name throughout consolidated docs +- [ ] Add note at top of Harper Studio section: + ```markdown + :::info Historical Note + Harper Studio was previously known as "HarperDB Studio" in versions prior to v4.7. + ::: + ``` +- [ ] Create redirect rule: `/administration/harperdb-studio/*` → `/administration/harper-studio/*` +- [ ] Update all cross-references to use new name +- [ ] Merge v4.1 `manage-functions.md` content into legacy/custom-functions section +- [ ] Document feature evolution: + - v4.1: `manage-functions.md`, `manage-charts.md` + - v4.7: `manage-applications.md`, `manage-replication.md` + +--- + +### Phase 3: Relocated/Reorganized Features + +#### 3.1 Clustering Documentation + +**Goal**: Consolidate under reference section with version history. + +**Actions**: +- [ ] Use v4.7 structure: `reference/clustering/` +- [ ] Add version note explaining the relocation: + ```markdown + :::info Location History + - v4.1: Top-level `clustering/` directory + - v4.2+: Moved to `reference/clustering/` + ::: + ``` +- [ ] Ensure all 13 files are present +- [ ] Update cross-references throughout documentation +- [ ] Add links to related features: + - Native Replication (v4.4+): `developers/replication/` + - Clustering Operations: `developers/operations-api/clustering.md` + - NATS Clustering: `developers/operations-api/clustering-nats.md` + +--- + +#### 3.2 Components Documentation + +**Goal**: Place in reference section with clear distinction between Applications, Extensions, and Plugins. + +**Actions**: +- [ ] Use v4.7 structure: `reference/components/` +- [ ] Add version timeline: + ```markdown + ## Version History + + - **v4.2**: Component Architecture introduced + - **v4.6**: Plugin API introduced + - **v4.7**: Components documentation moved to reference section + ``` +- [ ] Create clear sections: + - `applications.md` - HTTP routes, APIs, web applications + - `extensions.md` - Background services, data processing + - `plugins.md` - System integrations (v4.6+) + - `built-in-extensions.md` - Core system extensions + - `configuration.md` - Component configuration +- [ ] Cross-reference to `developers/applications/` for hands-on guides +- [ ] Document evolution from Custom Functions → Components + +--- + +#### 3.3 SQL Geospatial Functions + +**Goal**: Use consolidated file with internal navigation. + +**Actions**: +- [ ] Use v4.6+ structure: Single `reference/sql-guide/sql-geospatial-functions.md` +- [ ] Ensure all 9 functions are documented: + 1. `GEOAREA()` + 2. `GEOCONTAINS()` + 3. `GEOCONVERT()` + 4. `GEOCROSSES()` + 5. `GEODIFFERENCE()` + 6. `GEODISTANCE()` + 7. `GEOEQUAL()` + 8. `GEOLENGTH()` + 9. `GEONEAR()` +- [ ] Add table of contents at top for easy navigation +- [ ] Use consistent format for each function: + - Syntax + - Parameters + - Return value + - Examples + - Version availability +- [ ] Note the consolidation at the top: + ```markdown + :::info Documentation Consolidation + Prior to v4.6, each geospatial function was documented in a separate file. This documentation has been consolidated for easier reference. + ::: + ``` + +--- + +### Phase 4: Version-Specific Feature Documentation + +#### 4.1 Operations API Operations + +**Goal**: Document all operations with version introduced and option changes. + +**Actions**: +- [ ] Create "Version History" section for each operation +- [ ] Format example for `deploy_component`: + ```markdown + ## deploy_component + + **Introduced**: v4.2.0 + + ### Syntax + ```json + { + "operation": "deploy_component", + "project": "string", + "package": "string" // optional + } + ``` + + ### Options + + | Option | Type | Required | Introduced | Description | + |--------|------|----------|------------|-------------| + | `project` | string | Yes | v4.2 | Component project path | + | `package` | string | No | v4.2 | Package file path | + | `install_command` | string | No | v4.X | Custom install command | + + ### Version History + + - **v4.2.0**: Operation introduced + - **v4.X.0**: Added `install_command` option [NEED TO VERIFY VERSION] + ``` + +**Operations Requiring Version Tracking**: +1. `compact_database` (v4.3) +2. `clone_node` (v4.2) +3. All certificate operations (v4.4): + - `add_certificate` + - `list_certificates` + - `delete_certificate` +4. Component operations (v4.2): + - `deploy_component` - Track option additions + - `drop_component` + - `package_component` + - `get_components` +5. `get_analytics` (v4.5) + +**RESEARCH NEEDED**: +- [ ] When was `install_command` added to `deploy_component`? +- [ ] Review each operation's release notes for option additions + +--- + +#### 4.2 Data Model Features + +**Goal**: Document feature availability by version. + +**Actions**: +- [ ] Create "Feature Availability" tables in relevant sections +- [ ] Example for Relationships documentation: + ```markdown + ## Relationships + + **Introduced**: v4.3.0 (Tucker Release) + + Harper allows you to define relationships between tables using foreign keys. + + ### Feature Matrix + + | Feature | Since | Description | + |---------|-------|-------------| + | Foreign Keys | v4.3 | Link records across tables | + | Many-to-One | v4.3 | Multiple records reference one record | + | One-to-Many | v4.3 | One record references multiple records | + | Cascade Delete | v4.3 | Automatic deletion of related records | + ``` + +**Features Requiring Version Labels**: +1. Relationships & Joins (v4.3) +2. Computed Properties (v4.4) +3. Custom Indexing (v4.4) +4. Auto-increment Keys (v4.4) +5. Vector Indexing (v4.6) +6. BigInt Support (v4.3) +7. CRDT Support (v4.3) +8. Null Indexing (v4.3) +9. Blob Storage (v4.5) + +--- + +#### 4.3 API Features + +**Goal**: Document API evolution and new endpoints. + +**Actions**: +- [ ] Create API comparison table: + ```markdown + ## Data Access APIs + + | API | Introduced | Primary Use Case | + |-----|------------|------------------| + | Operations API | v4.0 | Administrative and data operations | + | Resource API | v4.2 | Unified data access interface | + | REST Interface | v4.2 | RESTful data access | + | GraphQL | v4.4 | Graph-based querying | + | SQL | v4.0 | Relational queries | + | NoSQL | v4.0 | Document operations | + ``` +- [ ] Add "API Evolution" timeline graphic/section +- [ ] Cross-reference between API types with version context + +--- + +#### 4.4 Security Features + +**Goal**: Document security enhancements by version. + +**Actions**: +- [ ] Create security features timeline: + ```markdown + ## Security Feature Timeline + + ### Core Authentication & Authorization (v4.0+) + - Username/Password authentication + - Role-based access control (RBAC) + - JWT token authentication + - LDAP integration + - SAML 2.0 support + + ### TLS/SSL Enhancements + - **v4.0**: Basic TLS/SSL support + - **v4.4**: Dynamic certificate management + - **v4.5**: Certificate revocation lists + - **v4.5**: Certificate verification + - **v4.7**: mTLS authentication + - **v4.7**: OCSP support + + ### Password Security + - **v4.0-v4.4**: SHA256 hashing + - **v4.5**: Argon2id support (recommended) + ``` +- [ ] Add security best practices with version recommendations +- [ ] Document migration paths for security upgrades + +--- + +### Phase 5: New Subsystem Documentation + +#### 5.1 Native Replication (v4.4+) + +**Goal**: Comprehensive replication documentation separate from legacy clustering. + +**Actions**: +- [ ] Ensure `developers/replication/` directory is complete +- [ ] Add clear distinction from legacy clustering: + ```markdown + ## Replication vs. Clustering + + Harper offers two approaches to distributed data: + + ### Native Replication (Plexus) - Introduced v4.4 + - WebSocket-based communication + - Automatic conflict resolution + - Sharding support + - Recommended for new deployments + - Documentation: `developers/replication/` + + ### Legacy Clustering (NATS-based) - v4.0+ + - NATS message bus communication + - Manual conflict resolution + - No sharding + - Documentation: `reference/clustering/` + ``` +- [ ] Include migration guide from clustering to replication +- [ ] Cross-reference to: + - `reference/clustering/` (legacy) + - `developers/operations-api/clustering.md` + - `developers/operations-api/clustering-nats.md` + +--- + +#### 5.2 Blob Storage (v4.5+) + +**Goal**: Complete blob storage documentation. + +**Actions**: +- [ ] Ensure `reference/blob.md` covers: + - What qualifies as blob data + - Streaming APIs + - Storage locations + - Performance characteristics + - Size limits + - Version: v4.5+ +- [ ] Add examples for: + - Storing images + - Storing videos + - Storing documents + - Streaming large files +- [ ] Cross-reference to: + - Storage algorithm documentation + - Resource API (for blob access) + - Storage analytics + +--- + +#### 5.3 Vector Indexing (v4.6+) + +**Goal**: Comprehensive vector search documentation. + +**Actions**: +- [ ] Ensure vector indexing documentation covers: + - HNSW algorithm explanation + - Use cases (similarity search, embeddings) + - Index creation + - Query syntax + - Performance tuning + - Version: v4.6+ +- [ ] Add examples for: + - Semantic search + - Image similarity + - Recommendation systems +- [ ] Cross-reference to: + - Custom indexing (v4.4) + - Computed properties + - Resource API query syntax + +--- + +### Phase 6: Cross-Version References + +#### 6.1 Version Labels + +**Goal**: Consistent version labeling throughout documentation. + +**Action**: Add version badges to features: +```markdown +## Auto-Incrementing Primary Keys v4.4+ + +Harper supports automatic generation of numeric primary keys. +``` + +**Standard Badge Types**: +- `v4.X+` - Feature introduced +- `Deprecated v4.7` - Deprecated feature +- `Removed v4.X` - Removed feature + +--- + +#### 6.2 Version-Specific Notes + +**Goal**: Call out version-specific behavior. + +**Standard Format**: +```markdown +:::info Version 4.3+ +This feature requires HarperDB v4.3 or later. +::: + +:::warning Versions prior to 4.5 +Password hashing in versions prior to 4.5 uses SHA256. Upgrade to v4.5+ for Argon2id support. +::: + +:::danger Breaking Change in v4.2 +The `search_attribute` parameter was deprecated in v4.2. Use `attribute` instead. +::: +``` + +--- + +### Phase 7: Navigation & Organization + +#### 7.1 Consolidated Sidebar + +**Goal**: Create unified sidebar that accommodates all versions. + +**Proposed Structure**: +```javascript +{ + "docsSidebar": [ + { + "type": "doc", + "id": "index", + "label": "Introduction" + }, + { + "type": "category", + "label": "Developers", + "items": [ + { + "type": "category", + "label": "Applications", + "items": [ + "developers/applications/index", + "developers/applications/define-routes", + "developers/applications/defining-schemas", + "developers/applications/defining-roles", + "developers/applications/caching", + "developers/applications/debugging", + "developers/applications/data-loader", + "developers/applications/example-projects", + "developers/applications/web-applications" + ] + }, + { + "type": "category", + "label": "Data Access APIs", + "items": [ + "developers/resource-api/index", + "developers/rest-interface", + "developers/operations-api/index", + "developers/graphql" + ] + }, + { + "type": "category", + "label": "Replication", + "link": { + "type": "doc", + "id": "developers/replication/index" + }, + "items": [ + "developers/replication/configuration", + "developers/replication/monitoring", + "developers/replication/troubleshooting" + ] + }, + { + "type": "category", + "label": "Security", + "items": [ + "developers/security/index", + "developers/security/authentication", + "developers/security/authorization", + "developers/security/tls-ssl", + "developers/security/jwt", + "developers/security/ldap", + "developers/security/saml", + "developers/security/certificate-verification", + "developers/security/mtls-auth" + ] + }, + "developers/real-time-messaging", + "developers/harperdb-applications-in-depth", + "developers/getting-started" + ] + }, + { + "type": "category", + "label": "Administration", + "items": [ + { + "type": "category", + "label": "Harper Studio", + "items": [ + "administration/harper-studio/index", + "administration/harper-studio/manage-applications", + "administration/harper-studio/manage-replication", + // ... other studio files + ] + }, + { + "type": "category", + "label": "Logging", + "items": [ + "administration/logging/index", + "administration/logging/standard-logging", + "administration/logging/transaction-logging", + "administration/logging/audit-logging" + ] + }, + "administration/configuration", + "administration/jobs" + ] + }, + { + "type": "category", + "label": "Deployments", + "items": [ + { + "type": "category", + "label": "Install HarperDB", + "items": [ + "deployments/install-harperdb/docker", + "deployments/install-harperdb/linux", + "deployments/install-harperdb/windows" + ] + }, + { + "type": "category", + "label": "HarperDB Cloud", + "items": [ + // cloud files + ] + }, + "deployments/configuration", + "deployments/harperdb-cli" + ] + }, + { + "type": "category", + "label": "Reference", + "link": { + "type": "doc", + "id": "reference/index" + }, + "items": [ + { + "type": "category", + "label": "Components", + "items": [ + "reference/components/index", + "reference/components/applications", + "reference/components/extensions", + "reference/components/plugins", + "reference/components/built-in-extensions", + "reference/components/configuration" + ] + }, + { + "type": "category", + "label": "Clustering", + "items": [ + // all 13 clustering files + ] + }, + { + "type": "category", + "label": "SQL Guide", + "items": [ + "reference/sql-guide/index", + "reference/sql-guide/sql-functions", + "reference/sql-guide/sql-geospatial-functions", + "reference/sql-guide/sql-math-functions", + "reference/sql-guide/sql-string-functions", + "reference/sql-guide/sql-datetime-functions" + ] + }, + { + "type": "category", + "label": "Resources", + "items": [ + "reference/resources/index", + "reference/resources/instance-binding", + "reference/resources/migration", + "reference/resources/query-optimization" + ] + }, + "reference/architecture", + "reference/analytics", + "reference/blob", + "reference/content-types", + "reference/data-types", + "reference/dynamic-schema", + "reference/globals", + "reference/graphql", + "reference/headers", + "reference/limits", + "reference/roles", + "reference/storage-algorithm", + "reference/transactions" + ] + }, + { + "type": "category", + "label": "Legacy Features", + "items": [ + { + "type": "category", + "label": "Custom Functions (Deprecated)", + "items": [ + "legacy/custom-functions/index", + "legacy/custom-functions/migration-guide", + "legacy/custom-functions/create-project", + "legacy/custom-functions/custom-functions-operations", + "legacy/custom-functions/debugging-custom-function", + "legacy/custom-functions/define-helpers", + "legacy/custom-functions/define-routes", + "legacy/custom-functions/example-projects", + "legacy/custom-functions/host-static", + "legacy/custom-functions/requirements-definitions", + "legacy/custom-functions/templates", + "legacy/custom-functions/using-npm-git" + ] + } + ] + }, + { + "type": "doc", + "id": "support", + "label": "Support" + } + ] +} +``` + +--- + +#### 7.2 Version Switcher + +**Goal**: Allow users to view version-specific documentation. + +**Options**: +1. **Unified docs with version labels** (Recommended) + - Single documentation tree + - Features labeled with version badges + - Deprecated features in separate "Legacy" section + - Pros: Easier to maintain, comprehensive view + - Cons: More complex individual pages + +2. **Version dropdown for major versions** + - Keep separate docs for v4.1, v4.2, etc. + - Add consolidated "Latest (v4.7)" version + - Pros: Version-accurate documentation + - Cons: Harder to maintain, fragmented + +**Recommendation**: Use unified docs with version labels, keep versioned docs archived for reference. + +--- + +### Phase 8: Migration Guides + +#### 8.1 Custom Functions to Components + +**Create**: `legacy/custom-functions/migration-guide.md` + +**Contents**: +- Introduction to Component Architecture +- Feature comparison table +- Step-by-step migration process +- Code examples (before/after) +- Common pitfalls +- FAQ + +--- + +#### 8.2 Legacy Clustering to Native Replication + +**Create**: `developers/replication/migration-from-clustering.md` + +**Contents**: +- Why migrate to Native Replication +- Feature comparison +- Migration process +- Downtime considerations +- Rollback procedures +- FAQ + +--- + +#### 8.3 NoSQL Parameter Updates + +**Create**: `developers/operations-api/nosql-parameter-migration.md` + +**Contents**: +- Parameter mappings +- Code examples +- Automated migration scripts +- Backward compatibility notes + +--- + +### Phase 9: Testing & Validation + +#### 9.1 Link Validation + +**Actions**: +- [ ] Run link checker on all internal links +- [ ] Verify all cross-references point to correct files +- [ ] Test version badge rendering +- [ ] Verify code examples compile/run + +--- + +#### 9.2 Version Accuracy + +**Actions**: +- [ ] Review each version label against release notes +- [ ] Verify operation availability by version +- [ ] Test feature examples on appropriate versions +- [ ] Confirm deprecation timelines + +--- + +#### 9.3 Navigation Testing + +**Actions**: +- [ ] Test sidebar navigation +- [ ] Verify search functionality finds all relevant results +- [ ] Test breadcrumb navigation +- [ ] Verify "Next/Previous" page links + +--- + +## Summary Statistics + +### Documentation Growth + +| Metric | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | +|--------|------|------|------|------|------|------|------| +| **Total Files** | 92 | 101 | 101+ | 101+ | 114+ | 114+ | 114+ | +| **Top-level Dirs** | 11 | 5 | 5 | 5 | 5 | 5 | 5 | +| **Major Features** | Baseline | +7 | +9 | +8 | +6 | +5 | +4 | +| **Deprecations** | 0 | 1 | 0 | 0 | 0 | 0 | 1 | + +--- + +### Feature Categories + +| Category | Features Added | Versions | +|----------|---------------|----------| +| **Architecture** | Component Architecture, Plugins | v4.2, v4.6 | +| **Data Model** | Relationships, Computed Props, Vector Index, Blob | v4.3, v4.4, v4.5, v4.6 | +| **APIs** | Resource API, REST, GraphQL | v4.2, v4.4 | +| **Clustering** | Native Replication, Sharding | v4.4 | +| **Security** | Dynamic Certs, Argon2id, OCSP, mTLS | v4.4, v4.5, v4.7 | +| **Real-time** | MQTT, WebSocket, SSE | v4.2 | +| **Developer Tools** | OpenAPI, CLI expansion, Data Loader | v4.3, v4.6 | + +--- + +## Next Steps + +1. **Review this mapping** with stakeholders for accuracy +2. **Research missing details**: + - When was `install_command` added to `deploy_component`? + - Any other operation option additions? +3. **Prioritize consolidation phases**: + - Phase 1 (Deprecated features) - High priority + - Phase 4 (Version-specific features) - High priority + - Phase 2-3 (Renames/reorganization) - Medium priority + - Phase 5-9 (New systems, navigation) - Lower priority +4. **Begin implementation** starting with deprecated features and version labeling + +--- + +## Document Maintenance + +**Last Updated**: 2026-02-05 +**Next Review**: After consolidation implementation +**Owner**: Documentation Team + +**Change Log**: +- 2026-02-05: Initial comprehensive analysis created From ad55dcece07970614b0397d718dee866234fa6b7 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 19 Feb 2026 11:09:14 -0700 Subject: [PATCH 03/51] update plans --- v4-docs-migration-map.md | 237 ++++++++++++++++++++++++++++++++++++-- v4-docs-reference-plan.md | 7 +- 2 files changed, 234 insertions(+), 10 deletions(-) diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 19a308b7..2983c57a 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -25,18 +25,26 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Merge Required**: Yes - CLI commands added across versions - **Version Annotations**: Track command additions from v4.1 → v4.7 - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API commands + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Dev mode (`harperdb dev`, `harperdb run`) ### `reference/cli/commands.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: Compare all versions for command evolution - **Version Annotations**: Each command should note its introduction version - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion + - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Foreground mode changes ### `reference/cli/operations-api-commands.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: `versioned_docs/version-4.3+` (CLI ops api support added in v4.3) - **Version Annotations**: Note v4.3.0 introduction - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API ### `reference/cli/authentication.md` - **Primary Source**: New content or extract from CLI docs @@ -52,6 +60,10 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/configuration.md` - `versioned_docs/version-4.1/configuration.md` (baseline) - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Major config changes (http section, componentRoot) + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Configuration improvements + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Developer/production mode ### `reference/configuration/options.md` - **Primary Source**: Current `reference/configuration.md` (very comprehensive) @@ -60,6 +72,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: Each config option needs version introduced - **Status**: Not Started - **Notes**: This will be a large migration task - the current configuration.md is 59KB +- **Release Notes**: Major config changes across many versions - see all major releases ### `reference/configuration/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/configuration.md` @@ -111,6 +124,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: `versioned_docs/version-4.3/developers/security/mtls-auth.md` - **Version Annotations**: Added in v4.3.0 - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS support added ### `reference/security/certificate-management.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-management.md` @@ -120,11 +135,16 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Merge Required**: Yes - dynamic certificate management added in v4.4 - **Version Annotations**: Dynamic certs added v4.4.0 - **Status**: Not Started +- **Release Notes**: + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Dynamic certificate management + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Certificate revocation ### `reference/security/certificate-verification.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-verification.md` - **Version Annotations**: Added in v4.7.0 (OCSP support) - **Status**: Not Started +- **Release Notes**: + - [4.7.0](release-notes/v4-tucker/4.7.0.md) - OCSP support ### `reference/security/cors.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/configuration.md` @@ -142,6 +162,9 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/defining-roles.md` - **Merge Required**: Yes - content spread across multiple files - **Status**: Not Started +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Password hashing upgrade (sha256, argon2id) + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Cookie-based sessions --- @@ -161,6 +184,10 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Plugin API: v4.6.0 - **Status**: Not Started - **Notes**: This is a critical page that explains the evolution +- **Release Notes**: + - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Custom functions with worker threads + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture introduced + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API with dynamic reloading ### `reference/components/applications.md` - **Primary Source**: `versioned_docs/version-4.7/reference/components/applications.md` @@ -169,18 +196,25 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/components/applications.md` - **Merge Required**: Yes - application developer docs scattered across multiple files - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture, NPM/GitHub deployment ### `reference/components/extension-api.md` - **Primary Source**: `versioned_docs/version-4.7/reference/components/extensions.md` - **Additional Sources**: Current `reference/components/extensions.md` - **Version Annotations**: Extension API formalized around v4.4-4.5 - **Status**: Not Started +- **Release Notes**: + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API ### `reference/components/plugin-api.md` - **Primary Source**: `versioned_docs/version-4.7/reference/components/plugins.md` - **Additional Sources**: Current `reference/components/plugins.md` - **Version Annotations**: Added in v4.6.0 - **Status**: Not Started +- **Release Notes**: + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Plugin API introduced + - [4.7.0](release-notes/v4-tucker/4.7.0.md) - Further plugin API improvements --- @@ -193,6 +227,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/architecture.md` - **Status**: Not Started - **Notes**: Should explain Resources + Schema + Auto-REST relationship +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Database structure changes (single file per database) ### `reference/database/schema.md` - **Primary Source**: `versioned_docs/version-4.7/developers/applications/defining-schemas.md` @@ -214,17 +250,27 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Vector indexing: v4.6.0 - **Status**: Not Started - **Notes**: Large consolidation - may want to keep blobs/vectors separate +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Configurable schemas with GraphQL syntax + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Relationships and joins, indexing nulls, BigInt support + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Computed properties, custom indexing, auto-incrementing primary keys + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Blob storage + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Vector indexing (HNSW) ### `reference/database/data-loader.md` - **Primary Source**: `versioned_docs/version-4.7/developers/applications/data-loader.md` - **Additional Sources**: Current `reference/data-loader.md` - **Version Annotations**: Added in v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Data loader introduced ### `reference/database/storage-algorithm.md` - **Primary Source**: `versioned_docs/version-4.7/reference/storage-algorithm.md` - **Additional Sources**: Current `reference/storage-algorithm.md` - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Storage performance improvements, compression by default ### `reference/database/jobs.md` - **Primary Source**: `versioned_docs/version-4.7/administration/jobs.md` @@ -245,6 +291,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: Current `reference/compact.md` - **Version Annotations**: Added in v4.3.0 - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Compact database functionality ### `reference/database/transaction.md` - **Primary Source**: `versioned_docs/version-4.7/administration/logging/transaction-logging.md` @@ -256,6 +304,9 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: Transaction logging available since v4.1.0, audit logging since v4.1.0 - **Status**: Not Started - **Notes**: Consolidated from separate audit and transaction logging pages +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Balanced audit log cleanup + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Transaction reuse, storage reclamation (audit log eviction) --- @@ -265,6 +316,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` - **Additional Sources**: Current `reference/resources/` folder - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced ### `reference/resources/resource-api.md` - **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` @@ -279,6 +332,12 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - loadAsInstance changes: v4.4.0+ - Response objects: v4.4.0 - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CRDT support + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Response object support + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Property forwarding + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Resource API upgrades ### `reference/resources/global-apis.md` - **Primary Source**: `versioned_docs/version-4.7/reference/globals.md` @@ -290,11 +349,15 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: Various APIs added across versions - **Status**: Not Started - **Notes**: Should reference out to http/api.md for `server` global +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Table.getRecordCount() ### `reference/resources/query-optimization.md` - **Primary Source**: `versioned_docs/version-4.7/reference/resources/query-optimization.md` - **Additional Sources**: Current `reference/resources/query-optimization.md` - **Status**: Not Started +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Query optimizations --- @@ -305,6 +368,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: Built-in extensions docs, configuration docs - **Version Annotations**: loadEnv added in v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Built-in loadEnv component ### `reference/environment-variables/configuration.md` - **Primary Source**: Extract from configuration docs or components docs @@ -331,12 +396,17 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: New content about HTTP server - **Additional Sources**: Configuration docs, architecture docs - **Status**: Not Started +- **Release Notes**: + - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Worker threads for HTTP requests + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Socket management (SO_REUSEPORT), flexible port configs ### `reference/http/configuration.md` - **Primary Source**: Extract from `reference/configuration.md` (http section) - **Version Annotations**: - HTTP/2 support: v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - HTTP/2 support ### `reference/http/api.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (server global) @@ -344,6 +414,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - server.authenticateUser: v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - server.authenticateUser API --- @@ -353,6 +425,8 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/rest.md` - **Additional Sources**: Current `reference/rest.md` - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - REST interface introduced ### `reference/rest/querying.md` - **Primary Source**: Extract from REST docs and NoSQL operations @@ -362,6 +436,10 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Null indexing/querying: v4.3.0 - URL path improvements: v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Iterator-based queries + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Relationships/joins, sorting, nested select, null indexing + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Improved URL path parsing, directURLMapping ### `reference/rest/headers.md` - **Primary Source**: `versioned_docs/version-4.7/reference/headers.md` @@ -378,10 +456,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` - **Additional Sources**: Current `reference/real-time.md` - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - WebSocket support ### `reference/rest/server-sent-events.md` - **Primary Source**: Extract from real-time or REST docs - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Server-Sent Events support --- @@ -396,11 +478,17 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Single-level wildcards: v4.3.0 - CRDT: v4.3.0 - **Status**: Not Started +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - MQTT support introduced (QoS 0 and 1, durable sessions) + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS, single-level wildcards, retain handling, CRDT + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Improved message delivery, blob support for MQTT ### `reference/mqtt/configuration.md` - **Primary Source**: Extract from configuration docs and real-time docs - **Version Annotations**: Port change v4.5.0 (9925 → 9933) - **Status**: Not Started +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Default replication port change --- @@ -410,6 +498,9 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/administration/logging/index.md` - **Additional Sources**: Current `reference/logging.md` (if exists) - **Status**: Not Started +- **Release Notes**: + - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Logging revamped, consolidated into hdb.log + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Major logging improvements ### `reference/logging/configuration.md` - **Primary Source**: Extract from configuration docs @@ -417,10 +508,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Per-component logging: v4.6.0 - Granular configuration: v4.6.0 - **Status**: Not Started +- **Release Notes**: + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Per-component logging, dynamic reloading, HTTP logging ### `reference/logging/api.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (logger global) - **Status**: Not Started +- **Release Notes**: + - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Logger based on Node.js Console API ### `reference/logging/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/logs.md` @@ -438,6 +533,9 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Resource analytics: v4.5.0 - Storage analytics: v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Resource and storage analytics + - [4.7.0](release-notes/v4-tucker/4.7.0.md) - New analytics and licensing functionality ### `reference/analytics/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/analytics.md` @@ -453,6 +551,9 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Native Replication (Plexus): v4.4.0 - **Status**: Not Started +- **Release Notes**: + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native replication system (Plexus), replicated operations + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information, improved replication timestamps ### `reference/replication/clustering.md` - **Primary Source**: `versioned_docs/version-4.7/reference/clustering/index.md` @@ -463,6 +564,10 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Merge Required**: Yes - extensive clustering documentation needs consolidation - **Status**: Not Started - **Notes**: Large section with many sub-pages +- **Release Notes**: + - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Clone node functionality + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native replication with PKI/mTLS + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information ### `reference/replication/sharding.md` - **Primary Source**: `versioned_docs/version-4.7/developers/replication/sharding.md` @@ -470,6 +575,9 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Sharding: v4.4.0 - Expanded functionality: v4.5.0 - **Status**: Not Started +- **Release Notes**: + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Sharding introduced + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Expanded sharding functionality --- @@ -483,6 +591,10 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Disabled by default: v4.5.0 - **Status**: Not Started - **Notes**: Mark as experimental/incomplete +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - OpenAPI specification endpoint + - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native GraphQL support (provisional) + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - GraphQL configuration, disabled by default --- @@ -494,6 +606,18 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Merge Required**: Maybe - consolidate or keep nested? - **Status**: Not Started - **Notes**: May want to keep as nested folder or consolidate into single page +- **Release Notes**: + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Local studio upgrade to match online version + +--- + +## Fastify Routes Section + +### `reference/fastify-routes/overview.md` +- **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` +- **Additional Sources**: Current `reference/define-routes.md` +- **Status**: Not Started +- **Notes**: Discouraged in favor of modern routing with components, but still a supported feature. --- @@ -518,12 +642,6 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Status**: N/A - **Notes**: Move entire section as-is with deprecation notice -### `reference/legacy/fastify-routes/` -- **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` -- **Additional Sources**: Current `reference/define-routes.md` -- **Status**: N/A -- **Notes**: Deprecated in favor of modern routing - --- ## Files Requiring Special Attention @@ -597,10 +715,115 @@ For each file migrated, ensure: - [ ] Operations APIs include "Added in:" notes - [ ] Links to related version-specific content +## Release Notes Reference Guide + +The `release-notes/v4-tucker/` directory contains 169 release note files covering the entire v4.0 - v4.7 series. Key major releases with significant feature additions: + +### Major Releases + +- **[4.1.0](release-notes/v4-tucker/4.1.0.md)** (Worker threads, iterator-based queries, logging revamp) +- **[4.2.0](release-notes/v4-tucker/4.2.0.md)** (Resource API, Component Architecture, REST interface, MQTT/WebSockets/SSE, configurable schemas) +- **[4.3.0](release-notes/v4-tucker/4.3.0.md)** (Relationships/joins, null indexing, CLI expansion, mTLS, BigInt, compaction) +- **[4.4.0](release-notes/v4-tucker/4.4.0.md)** (Native replication/Plexus, sharding, computed properties, custom indexing, GraphQL, dynamic certificates) +- **[4.5.0](release-notes/v4-tucker/4.5.0.md)** (Blob storage, password hashing upgrade, HTTP/2, expanded sharding, loadEnv component) +- **[4.6.0](release-notes/v4-tucker/4.6.0.md)** (Vector indexing/HNSW, new extension API, logging improvements, data loader) +- **[4.7.0](release-notes/v4-tucker/4.7.0.md)** (Component status monitoring, OCSP, new analytics/licensing) + +### Feature-to-Release Note Mapping + +When adding version annotations, refer to these key features and their introduction versions: + +**CLI & Configuration** +- CLI expansion with operations API: 4.3.0 +- Dev mode (`harperdb dev`): 4.2.0 +- Configuration improvements: 4.3.0, 4.4.0 + +**Security** +- mTLS support: 4.3.0 +- Dynamic certificate management: 4.4.0 +- OCSP support: 4.7.0 +- Password hashing (sha256, argon2id): 4.5.0 +- Certificate revocation: 4.5.0 + +**Components & Extensions** +- Component architecture: 4.2.0 +- Custom functions with worker threads: 4.1.0 +- New extension API: 4.6.0 +- Plugin API: 4.6.0, 4.7.0 +- Built-in loadEnv component: 4.5.0 + +**Database & Schema** +- Configurable schemas (GraphQL syntax): 4.2.0 +- Relationships and joins: 4.3.0 +- Computed properties: 4.4.0 +- Custom indexing: 4.4.0 +- Blob storage: 4.5.0 +- Vector indexing (HNSW): 4.6.0 +- BigInt support: 4.3.0 +- Null indexing: 4.3.0 +- Auto-incrementing primary keys: 4.4.0 + +**Data Access** +- Resource API: 4.2.0 +- CRDT support: 4.3.0 +- Response object support: 4.4.0 +- Property forwarding: 4.5.0 +- Data loader: 4.6.0 +- Iterator-based queries: 4.1.0 + +**REST & HTTP** +- REST interface: 4.2.0 +- HTTP/2 support: 4.5.0 +- Improved URL path parsing: 4.5.0 +- server.authenticateUser API: 4.5.0 +- Worker threads for HTTP: 4.1.0 + +**Real-Time & MQTT** +- MQTT support: 4.2.0 +- WebSocket support: 4.2.0 +- Server-Sent Events: 4.2.0 +- MQTT mTLS: 4.3.0 +- MQTT single-level wildcards: 4.3.0 +- MQTT retain handling: 4.3.0 +- Improved message delivery: 4.5.0 + +**Replication & Clustering** +- Native replication (Plexus): 4.4.0 +- Sharding: 4.4.0 +- Expanded sharding functionality: 4.5.0 +- Clone node: 4.2.0 +- Replicated operations: 4.4.0 + +**Logging** +- Logging consolidated to hdb.log: 4.1.0 +- Per-component logging: 4.6.0 +- Dynamic logging reload: 4.6.0 +- HTTP logging: 4.6.0 + +**GraphQL** +- OpenAPI specification: 4.3.0 +- Native GraphQL support (provisional): 4.4.0 +- GraphQL disabled by default: 4.5.0 + +**Storage & Performance** +- Database structure (single file): 4.2.0 +- Storage performance improvements: 4.3.0 +- Compression by default: 4.3.0 +- Compact database: 4.3.0 +- Storage reclamation: 4.5.0 + +**Analytics** +- Resource and storage analytics: 4.5.0 +- New analytics/licensing: 4.7.0 + +**Studio** +- Local studio upgrade: 4.3.0 + ## Notes - Many current `reference/` files appear to already be partially reorganized - The `versioned_docs/` folders contain the historical record - Compare git history to validate when features were actually introduced -- Use release notes to cross-reference feature versions +- Use release notes to cross-reference feature versions (see Release Notes Reference Guide above) - Consider scripting the version annotation process for configuration options +- The release notes directory contains 169 files - use the Feature-to-Release Note Mapping above as a quick reference diff --git a/v4-docs-reference-plan.md b/v4-docs-reference-plan.md index fd1eab9f..a429b78d 100644 --- a/v4-docs-reference-plan.md +++ b/v4-docs-reference-plan.md @@ -339,14 +339,15 @@ reference/ ├── studio/ │ └── overview.md # Studio documentation (still ships with v4 but moving to legacy) │ +├── fastify-routes/ +│ └── overview.md # Fastify routes documentation (discouraged in favor of components) +│ └── legacy/ ├── cloud/ # Legacy cloud documentation (replaced by Fabric) │ ├── custom-functions/ # Custom functions (deprecated in favor of components) │ - ├── sql/ # SQL guide (discouraged) - │ - └── fastify-routes/ # Fastify routes (discouraged) + └── sql/ # SQL guide (discouraged) ``` ## Redirects From 95752e656072f2d9bfb2c062cdfbb27f9d17fbd1 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 19 Feb 2026 11:11:43 -0700 Subject: [PATCH 04/51] create migration context dir --- migration-context/link-placeholders/todo | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 migration-context/link-placeholders/todo diff --git a/migration-context/link-placeholders/todo b/migration-context/link-placeholders/todo new file mode 100644 index 00000000..e69de29b From 241f8cbeab330140999a045c5db6e3b4eadf08d8 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 19 Feb 2026 11:37:54 -0700 Subject: [PATCH 05/51] Configure build system for major-version-reorg branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Temporarily disable local search plugin (will re-enable later) - Set onBrokenLinks to 'warn' to allow build during migration - Add /reference redirect page to handle versioned docs structure - Create index.md files for reference directories - Update sidebars to autogenerate from directory structure - Document temporary changes in project brief for future re-enablement The site now builds successfully and is ready for migration PRs. Broken link warnings are expected as content is migrated. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- docusaurus.config.ts | 20 ++----------------- reference/{todo => index.md} | 2 +- .../version-v4/{todo => index.md} | 1 + .../version-v4-sidebars.json | 7 ++++++- sidebarsReference.ts | 5 ----- src/pages/reference/index.tsx | 13 ++++++++++++ v4-docs-project-brief.md | 11 ++++++++++ 7 files changed, 34 insertions(+), 25 deletions(-) rename reference/{todo => index.md} (79%) rename reference_versioned_docs/version-v4/{todo => index.md} (88%) create mode 100644 src/pages/reference/index.tsx diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 74078f22..eb37f5e0 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -55,7 +55,7 @@ const config: Config = { organizationName: 'HarperFast', // Usually your GitHub org/user name. projectName: 'documentation', // Usually your repo name. - onBrokenLinks: 'throw', + onBrokenLinks: 'warn', plugins: [ [ @@ -269,23 +269,7 @@ const config: Config = { // Use Algolia search in production when env vars are set, otherwise use local search ...(process.env.NODE_ENV === 'production' && process.env.ALGOLIA_APP_ID && process.env.ALGOLIA_SEARCH_KEY ? ['@docusaurus/theme-search-algolia'] - : [ - [ - require.resolve('@easyops-cn/docusaurus-search-local'), - { - hashed: true, - language: ['en'], - indexDocs: true, - indexBlog: false, - indexPages: true, - docsRouteBasePath: routeBasePath, - highlightSearchTermsOnTargetPage: true, - searchResultLimits: 8, - searchBarPosition: 'right', - docsPluginIdForPreferredVersion: 'reference' - }, - ], - ]), + : []), '@docusaurus/theme-mermaid', ], diff --git a/reference/todo b/reference/index.md similarity index 79% rename from reference/todo rename to reference/index.md index 142fa0a3..c6e04335 100644 --- a/reference/todo +++ b/reference/index.md @@ -1,2 +1,2 @@ -future v5 docs +# future v5 docs replace with final product of /reference_versioned_docs/version-v4/ \ No newline at end of file diff --git a/reference_versioned_docs/version-v4/todo b/reference_versioned_docs/version-v4/index.md similarity index 88% rename from reference_versioned_docs/version-v4/todo rename to reference_versioned_docs/version-v4/index.md index 97322323..a61aa632 100644 --- a/reference_versioned_docs/version-v4/todo +++ b/reference_versioned_docs/version-v4/index.md @@ -1 +1,2 @@ +# v4 replace with new reference docs content \ No newline at end of file diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index a352dd07..5a3f77b6 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -1,3 +1,8 @@ { - "docsSidebar": [] + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] } diff --git a/sidebarsReference.ts b/sidebarsReference.ts index b386fdc6..eb596dbb 100644 --- a/sidebarsReference.ts +++ b/sidebarsReference.ts @@ -2,11 +2,6 @@ import type { SidebarsConfig } from '@docusaurus/plugin-content-docs'; const sidebars: SidebarsConfig = { referenceSidebar: [ - { - type: 'doc', - id: 'index', - label: 'Reference', - }, { type: 'autogenerated', dirName: '.' } diff --git a/src/pages/reference/index.tsx b/src/pages/reference/index.tsx new file mode 100644 index 00000000..acd319d9 --- /dev/null +++ b/src/pages/reference/index.tsx @@ -0,0 +1,13 @@ +import { useEffect } from 'react'; +import { useHistory } from '@docusaurus/router'; + +export default function ReferenceRedirect() { + const history = useHistory(); + + useEffect(() => { + // Redirect to the v4 reference docs + history.replace('/reference/v4'); + }, [history]); + + return null; +} diff --git a/v4-docs-project-brief.md b/v4-docs-project-brief.md index 20d72ea7..30ace1b2 100644 --- a/v4-docs-project-brief.md +++ b/v4-docs-project-brief.md @@ -111,6 +111,17 @@ This consolidation will improve documentation maintainability, make features mor - **Rationale**: Validate quality and process before committing to full migration - **Impact**: Adds ~2-3 days upfront but reduces risk of rework +### 2026-02-19: Temporary Build Simplifications +- **Decision**: Temporarily disable local search plugin and set `onBrokenLinks: 'warn'` +- **Rationale**: Allows build to succeed during migration while reference docs are being populated +- **Impact**: Must remember to re-enable before merging to main: + - Re-enable local search plugin in `docusaurus.config.ts` themes section + - Change `onBrokenLinks` back to `'throw'` +- **Note**: prebuild.js and postbuild.js scripts are still needed and should remain: + - prebuild.js generates release-notes-data.json used by React components + - postbuild.js creates index.html files for URL flexibility (/path and /path/) + - Remove or update prebuild/postbuild scripts if no longer needed + --- ## Known Issues & Blockers From 021d80004f8a3b8be9d2be9faecbc33ca583e30d Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 23 Feb 2026 15:31:36 -0700 Subject: [PATCH 06/51] docs: migrate CLI section to v4 consolidated reference (#439) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: migrate CLI section to v4 consolidated reference Migrated Harper CLI documentation from versioned_docs to new reference_versioned_docs/version-v4/cli/ structure following the v4 documentation consolidation plan. Files created: - reference_versioned_docs/version-v4/cli/overview.md - reference_versioned_docs/version-v4/cli/commands.md - reference_versioned_docs/version-v4/cli/operations-api-commands.md - reference_versioned_docs/version-v4/cli/authentication.md - migration-context/link-placeholders/cli-link-placeholders.md Key Features: Version Annotations - All 72 CLI operations now include "Available Since" information with complete version coverage: - 51 operations from v4.3.0 - 11 operations from v4.4.0 (SSH and certificate operations) - 3 data operations from v4.4.9 (insert, update, upsert) - 3 status operations from v4.6.0 - Individual operations from v4.7.2 and v4.7.3 Authentication Documentation - Corrected to explain Unix domain socket authentication for local operations (automatically authenticated as superuser). Detailed environment variable and command parameter methods for remote operations with security best practices. Operations API Commands - Complete operations table with all 72 operations categorized by function, command aliases, parameter formatting guidelines, and cross-references to related sections via tip callouts. Process Management - Documented PID file location at /hdb.pid, clarified foreground/background behavior for commands, and updated dev mode features. Link Management - 12 internal CLI section links resolved using relative paths. ~96 cross-section TODO links documented for future resolution in cli-link-placeholders.md. Content Consolidation - Merged content from v4.1 through v4.7 with inline version annotations following Node.js documentation patterns. Migration approach: - Single consolidated v4 reference (no separate v4.1-v4.7 versions) - Inline version annotations for feature availability - TODO: prefix for cross-section links pending future migrations - Comprehensive source attribution in HTML comments - Learn guide links use /learn paths - Consistent use of 'harper' command throughout 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 * fixup! docs: migrate CLI section to v4 consolidated reference * fixup! docs: migrate CLI section to v4 consolidated reference --------- Co-authored-by: Claude Sonnet 4.5 --- .../cli-link-placeholders.md | 282 +++ .../version-v4/cli/authentication.md | 231 +++ .../version-v4/cli/commands.md | 263 +++ .../version-v4/cli/operations-api-commands.md | 376 ++++ .../version-v4/cli/overview.md | 196 ++ .../version-v4-sidebars.json | 35 +- v4-docs-execution-procedure.md | 591 ------ v4-docs-implementation-plan.md | 297 +-- v4-docs-migration-map.md | 19 +- v4-feature-history-ai-gen.md | 1692 ----------------- 10 files changed, 1553 insertions(+), 2429 deletions(-) create mode 100644 migration-context/link-placeholders/cli-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/cli/authentication.md create mode 100644 reference_versioned_docs/version-v4/cli/commands.md create mode 100644 reference_versioned_docs/version-v4/cli/operations-api-commands.md create mode 100644 reference_versioned_docs/version-v4/cli/overview.md delete mode 100644 v4-docs-execution-procedure.md delete mode 100644 v4-feature-history-ai-gen.md diff --git a/migration-context/link-placeholders/cli-link-placeholders.md b/migration-context/link-placeholders/cli-link-placeholders.md new file mode 100644 index 00000000..60f8485e --- /dev/null +++ b/migration-context/link-placeholders/cli-link-placeholders.md @@ -0,0 +1,282 @@ +# Link Placeholders for CLI Section + +This document tracks all link placeholders in the CLI section that need to be resolved once other sections are migrated. + +## reference_versioned_docs/version-v4/cli/overview.md + +- Line 30: `[CLI Authentication](TODO:reference_versioned_docs/version-v4/cli/authentication.md "CLI authentication details")` + - Context: Linking to CLI authentication details + - Target: CLI authentication page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 45: `[CLI Commands](TODO:reference_versioned_docs/version-v4/cli/commands.md "Detailed CLI command reference")` + - Context: Linking to detailed CLI commands + - Target: CLI commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 60: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview")` + - Context: Linking to Operations API overview + - Target: Operations API section overview page + - **Status**: PENDING (will be created in Operations API section migration) + +- Line 100: `[Operations API Commands](TODO:reference_versioned_docs/version-v4/cli/operations-api-commands.md "Operations API CLI commands reference")` + - Context: Linking to operations API commands via CLI + - Target: Operations API commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 126: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview")` + - Context: Linking to configuration options + - Target: Configuration section overview + - **Status**: PENDING (will be created in Configuration section migration) + +- Line 144: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API")` + - Context: Linking to full operations API reference + - Target: Operations API section overview page + - **Status**: PENDING (will be created in Operations API section migration) + +## reference_versioned_docs/version-v4/cli/commands.md + +- Line 9: `[Operations API Commands](TODO:reference_versioned_docs/version-v4/cli/operations-api-commands.md "Operations API CLI commands")` + - Context: Referring users to operations API commands documentation + - Target: Operations API commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 52: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview")` + - Context: Linking to configuration parameters + - Target: Configuration section overview + - **Status**: PENDING (will be created in Configuration section migration) + +- Line 125: `[Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Database compaction reference")` + - Context: Linking to database compaction details + - Target: Database section compaction page + - **Status**: PENDING (will be created in Database section migration) + +- Line 146: `[CLI Overview - Remote Operations](TODO:reference_versioned_docs/version-v4/cli/overview.md#remote-operations "Remote operations documentation")` + - Context: Linking to remote operations section in overview + - Target: CLI overview page, remote operations section + - **Status**: RESOLVED (file created in this migration) + +- Line 151: `[CLI Overview](TODO:reference_versioned_docs/version-v4/cli/overview.md "CLI overview")` + - Context: Linking back to CLI overview + - Target: CLI overview page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 152: `[Operations API Commands](TODO:reference_versioned_docs/version-v4/cli/operations-api-commands.md "Operations API commands")` + - Context: Linking to operations API commands + - Target: Operations API commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 153: `[CLI Authentication](TODO:reference_versioned_docs/version-v4/cli/authentication.md "CLI authentication")` + - Context: Linking to authentication mechanisms + - Target: CLI authentication page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 154: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration")` + - Context: Linking to configuration parameters + - Target: Configuration section overview + - **Status**: PENDING (will be created in Configuration section migration) + +- Line 155: `[Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Compaction")` + - Context: Linking to compaction details + - Target: Database section compaction page + - **Status**: PENDING (will be created in Database section migration) + +## reference_versioned_docs/version-v4/cli/operations-api-commands.md + +- Line 12: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview")` + - Context: Introduction paragraph + - Target: Operations API section overview page + - **Status**: PENDING (will be created in Operations API section migration) + +- Lines 38-109: 72 operations table entries with category links + - `[Database](TODO:../operations-api/database.md)` (9 operations) + - `[Data](TODO:../operations-api/data.md)` (9 operations) + - `[Security](TODO:../operations-api/security.md)` (17 operations) + - `[Clustering](TODO:../operations-api/clustering.md)` (4 operations) + - `[Components](TODO:../operations-api/components.md)` (9 operations) + - `[Configuration](TODO:../operations-api/configuration.md)` (2 operations) + - `[Authentication](TODO:../operations-api/authentication.md)` (2 operations) + - `[System](TODO:../operations-api/system.md)` (3 operations) + - `[Licensing](TODO:../operations-api/licensing.md)` (4 operations) + - `[Jobs](TODO:../operations-api/jobs.md)` (2 operations) + - `[Logging](TODO:../operations-api/logging.md)` (4 operations) + - `[Maintenance](TODO:../operations-api/maintenance.md)` (2 operations) + - `[Status](TODO:../operations-api/status.md)` (3 operations) + - Context: Operations table linking to operation category documentation + - Target: Operations API section category pages + - **Status**: PENDING (will be created in Operations API section migration) + +- Line 118: `[Operations API documentation](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Complete operations list")` + - Context: After Command Aliases section + - Target: Operations API section operations page + - **Status**: PENDING (will be created in Operations API section migration) + +- Line 150: `[Database Reference](TODO:reference_versioned_docs/version-v4/database/overview.md "Database reference documentation")` + - Context: Tip callout in Database Operations examples + - Target: Database section overview + - **Status**: PENDING (will be created in Database section migration) + +- Line 168: `[REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST API reference")` + - Context: Tip callout in Data Operations examples + - Target: REST section overview + - **Status**: PENDING (will be created in REST section migration) + +- Line 168: `[GraphQL Querying](TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md "GraphQL querying reference")` + - Context: Tip callout in Data Operations examples + - Target: GraphQL Querying section overview + - **Status**: PENDING (will be created in GraphQL Querying section migration) + +- Line 186: `[Configuration Reference](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration reference")` + - Context: Tip callout in Configuration Operations examples + - Target: Configuration section overview + - **Status**: PENDING (will be created in Configuration section migration) + +- Line 204: `[Components Reference](TODO:reference_versioned_docs/version-v4/components/overview.md "Components reference")` + - Context: Tip callout in Component Operations examples + - Target: Components section overview + - **Status**: PENDING (will be created in Components section migration) + +- Line 222: `[Security Reference](TODO:reference_versioned_docs/version-v4/security/overview.md "Security reference")` + - Context: Tip callout in User and Role Operations examples + - Target: Security section overview + - **Status**: PENDING (will be created in Security section migration) + +- Line 227: `[CLI Overview - Remote Operations](./overview.md#remote-operations)` + - Context: Remote Operations section + - Target: CLI overview page, remote operations anchor + - **Status**: RESOLVED (file created in this migration) + +- Line 245: `[Applications](TODO:reference_versioned_docs/version-v4/applications/overview.md "Applications reference")` + - Context: Remote Component Deployment section + - Target: Applications section (or Components section) + - **Status**: PENDING (will be created in future section migration) + +- Line 246: `[Deploying Harper Applications](TODO:learn_link "Deploying applications guide")` + - Context: Remote Component Deployment section + - Target: Learn guide on deploying applications + - **Status**: PENDING (external learn link) + +- Line 372: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API")` + - Context: Limitations section + - Target: Operations API section overview page + - **Status**: PENDING (will be created in Operations API section migration) + +- Line 376: `[CLI Overview](./overview.md)` + - Context: See Also section + - Target: CLI overview page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 377: `[CLI Commands](./commands.md)` + - Context: See Also section + - Target: CLI commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 378: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview")` + - Context: See Also section + - Target: Operations API section overview page + - **Status**: PENDING (will be created in Operations API section migration) + +- Line 379: `[Operations API Reference](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Operations reference")` + - Context: See Also section + - Target: Operations API section operations page + - **Status**: PENDING (will be created in Operations API section migration) + +- Line 380: `[CLI Authentication](./authentication.md)` + - Context: See Also section + - Target: CLI authentication page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +## reference_versioned_docs/version-v4/cli/authentication.md + +- Line 196: `[Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles documentation")` + - Context: Linking to user management and permissions (Security Best Practices section) + - Target: Security section users and roles page + - **Status**: PENDING (will be created in Security section migration) + +- Line 204: `[Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md "Logging documentation")` + - Context: Linking to audit logging information (Security Best Practices section) + - Target: Logging section overview + - **Status**: PENDING (will be created in Logging section migration) + +- Line 256: `[CLI Overview](./overview.md)` + - Context: See Also section + - Target: CLI overview page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 257: `[CLI Commands](./commands.md)` + - Context: See Also section + - Target: CLI commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 258: `[Operations API Commands](./operations-api-commands.md)` + - Context: See Also section + - Target: Operations API commands page (within CLI section) + - **Status**: RESOLVED (file created in this migration) + +- Line 259: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md "Security overview")` + - Context: See Also section + - Target: Security section overview page + - **Status**: PENDING (will be created in Security section migration) + +- Line 260: `[Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles")` + - Context: See Also section + - Target: Security section users and roles page + - **Status**: PENDING (will be created in Security section migration) + +## Summary + +### Resolved Links (Within CLI Section) +- 12 links to pages within the CLI section (all resolved in this migration) +- All internal CLI section links now use relative paths (e.g., `./overview.md`) + +### Pending Links (Cross-Section References) +These will be resolved in future section migrations: + +**Operations API Section** (~82 links): +- `reference_versioned_docs/version-v4/operations-api/overview.md` (5 occurrences) +- `reference_versioned_docs/version-v4/operations-api/operations.md` (2 occurrences) +- Operations table category pages (72 links): + - `../operations-api/database.md` + - `../operations-api/data.md` + - `../operations-api/security.md` + - `../operations-api/clustering.md` + - `../operations-api/components.md` + - `../operations-api/configuration.md` + - `../operations-api/authentication.md` + - `../operations-api/system.md` + - `../operations-api/licensing.md` + - `../operations-api/jobs.md` + - `../operations-api/logging.md` + - `../operations-api/maintenance.md` + - `../operations-api/status.md` + +**Configuration Section** (5 links): +- `reference_versioned_docs/version-v4/configuration/overview.md` + +**Database Section** (3 links): +- `reference_versioned_docs/version-v4/database/compaction.md` (2 occurrences) +- `reference_versioned_docs/version-v4/database/overview.md` (1 occurrence) + +**Security Section** (4 links): +- `reference_versioned_docs/version-v4/security/overview.md` (2 occurrences) +- `reference_versioned_docs/version-v4/security/users-and-roles.md` (2 occurrences) + +**Logging Section** (1 link): +- `reference_versioned_docs/version-v4/logging/overview.md` + +**Components Section** (1 link): +- `reference_versioned_docs/version-v4/components/overview.md` + +**REST Section** (1 link): +- `reference_versioned_docs/version-v4/rest/overview.md` + +**GraphQL Querying Section** (1 link): +- `reference_versioned_docs/version-v4/graphql-querying/overview.md` + +**Applications Section** (1 link): +- `reference_versioned_docs/version-v4/applications/overview.md` + +**Learn Guides** (1 link): +- Deploying Harper Applications guide (external learn link) + +**Total Pending Links**: ~96 diff --git a/reference_versioned_docs/version-v4/cli/authentication.md b/reference_versioned_docs/version-v4/cli/authentication.md new file mode 100644 index 00000000..1ff6f327 --- /dev/null +++ b/reference_versioned_docs/version-v4/cli/authentication.md @@ -0,0 +1,231 @@ +--- +title: CLI Authentication +--- + + + + +# CLI Authentication + +The Harper CLI handles authentication differently for local and remote operations. + +## Local Operations + +Available since: v4.1.0 + +For local operations (operations executed on the same machine where Harper is installed), the CLI communicates with Harper via Unix domain sockets instead of HTTP. Domain socket requests are automatically authenticated as the superuser, so no additional authentication parameters are required. + +**Example**: + +```bash +# No authentication needed for local operations +harper describe_database database=dev +harper get_components +harper set_configuration logging_level=info +``` + +When no `target` parameter is specified, the CLI defaults to using the local domain socket connection, providing secure, authenticated access to the local Harper instance. + +## Remote Operations + +Available since: v4.1.0; expanded in: v4.3.0 + +For remote operations (operations executed on a remote Harper instance via the `target` parameter), you must provide authentication credentials. + +### Authentication Methods + +#### Method 1: Environment Variables (Recommended) + +Set the following environment variables to avoid exposing credentials in command history: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=password +``` + +Then execute remote operations without including credentials in the command: + +```bash +harper describe_database database=dev target=https://server.com:9925 +harper get_components target=https://remote-instance.example.com:9925 +``` + +**Benefits**: +- Credentials not visible in command history +- More secure for scripting +- Can be set once per session +- Supported by most CI/CD systems + +**Example Script**: + +```bash +#!/bin/bash + +# Set credentials from secure environment +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=$SECURE_PASSWORD # from secret manager + +# Execute operations +harper deploy target=https://prod-server.com:9925 replicated=true +harper restart target=https://prod-server.com:9925 replicated=true +``` + +#### Method 2: Command Parameters + +Provide credentials directly as command parameters: + +```bash +harper describe_database \ + database=dev \ + target=https://server.com:9925 \ + username=HDB_ADMIN \ + password=password +``` + +**Parameters**: +- `username=` - Harper admin username +- `password=` - Harper admin password + +**Cautions**: +- Credentials visible in command history +- Less secure for production environments +- Exposed in process listings +- Not recommended for scripts + +### Target Parameter + +The `target` parameter specifies the full HTTP/HTTPS URL of the remote Harper instance: + +**Format**: `target=://:` + +**Examples**: + +```bash +# HTTPS on default operations API port +target=https://server.example.com:9925 + +# HTTP (not recommended for production) +target=http://localhost:9925 + +# Custom port +target=https://server.example.com:8080 +``` + +## Security Best Practices + +### 1. Use Environment Variables + +Always use environment variables for credentials in scripts and automation: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=$SECURE_PASSWORD +``` + +### 2. Use HTTPS + +Always use HTTPS for remote operations to encrypt credentials in transit: + +```bash +# Good +target=https://server.com:9925 + +# Bad - credentials sent in plain text +target=http://server.com:9925 +``` + +### 3. Manage Secrets Securely + +Store credentials in secure secret management systems: + +- Environment variables from secret managers (AWS Secrets Manager, HashiCorp Vault, etc.) +- CI/CD secret storage (GitHub Secrets, GitLab CI Variables, etc.) +- Operating system credential stores + +**Example with AWS Secrets Manager**: + +```bash +#!/bin/bash + +# Retrieve credentials from AWS Secrets Manager +export CLI_TARGET_USERNAME=$(aws secretsmanager get-secret-value \ + --secret-id harper-admin-user \ + --query SecretString \ + --output text) + +export CLI_TARGET_PASSWORD=$(aws secretsmanager get-secret-value \ + --secret-id harper-admin-password \ + --query SecretString \ + --output text) + +# Execute operations +harper deploy target=https://prod.example.com:9925 +``` + +### 4. Use Least Privilege + +Create dedicated users with minimal required permissions for CLI operations instead of using the main admin account. See [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles documentation") for more information. + +### 5. Rotate Credentials + +Regularly rotate credentials, especially for automated systems and CI/CD pipelines. + +### 6. Audit Access + +Monitor and audit CLI operations, especially for production environments. See [Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md "Logging documentation") for more information on logging. + +## Troubleshooting + +### Authentication Failures + +If you receive authentication errors: + +1. **Verify credentials are correct**: + - Check username and password + - Ensure no extra whitespace + +2. **Verify the target URL**: + - Ensure the URL is correct and reachable + - Check the port number + - Verify HTTPS/HTTP protocol + +3. **Check network connectivity**: + ```bash + curl https://server.com:9925 + ``` + +4. **Verify user permissions**: + - Ensure the user has permission to execute the operation + - Check user roles and permissions + +### Environment Variable Issues + +If environment variables aren't working: + +1. **Verify variables are set**: + ```bash + echo $CLI_TARGET_USERNAME + echo $CLI_TARGET_PASSWORD + ``` + +2. **Export variables**: + Ensure you used `export`, not just assignment: + ```bash + # Wrong - variable only available in current shell + CLI_TARGET_USERNAME=admin + + # Correct - variable available to child processes + export CLI_TARGET_USERNAME=admin + ``` + +3. **Check variable scope**: + - Variables must be exported before running commands + - Variables set in one terminal don't affect other terminals + +## See Also + +- [CLI Overview](./overview.md) - General CLI information +- [CLI Commands](./commands.md) - Core CLI commands +- [Operations API Commands](./operations-api-commands.md) - Operations available through CLI +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md "Security overview") - Harper security features +- [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles") - User management diff --git a/reference_versioned_docs/version-v4/cli/commands.md b/reference_versioned_docs/version-v4/cli/commands.md new file mode 100644 index 00000000..2d46a5ac --- /dev/null +++ b/reference_versioned_docs/version-v4/cli/commands.md @@ -0,0 +1,263 @@ +--- +title: CLI Commands +--- + + + + + + +# CLI Commands + +This page documents the core Harper CLI commands for managing Harper instances. For Operations API commands available through the CLI, see [Operations API Commands](./operations-api-commands.md). + +## Process Management Commands + +### `harper` + +Added in: v4.1.0 + +Run Harper in the foreground as a standard process. This is the recommended way to run Harper. + +```bash +harper +``` + +When you run `harper`: + +- If Harper is not installed, it will guide you through the installation process +- Once installed, it runs Harper in the foreground as a standard process, compatible with systemd, Docker, and other process management tools + +**First-Time Installation**: + +If Harper is not installed, you can provide configuration parameters via environment variables or command line arguments: + +**Using Environment Variables**: + +```bash +# Minimum required parameters for no additional CLI prompts +export TC_AGREEMENT=yes +export HDB_ADMIN_USERNAME=HDB_ADMIN +export HDB_ADMIN_PASSWORD=password +export ROOTPATH=/hdb/ +harper +``` + +:::note +If you specify `DEFAULT_MODE=dev` you will also need to specify the `REPLICATION_HOSTNAME=localhost` +::: + +**Using Command Line Arguments**: + +```bash +# Minimum required parameters for no additional CLI prompts +harper \ + --TC_AGREEMENT=yes \ + --HDB_ADMIN_USERNAME=HDB_ADMIN \ + --HDB_ADMIN_PASSWORD=password \ + --ROOTPATH='/hdb' +``` + +**Note**: When used in conjunction, command line arguments override environment variables. See [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview") for a full list of configuration parameters. + +:::info +For more information on installation, see [Getting Started / Install and Connect Harper](/learn/getting-started/install-and-connect-harper). +::: + +### `harper run` + +Added in: v4.2.0 (confirmed via release notes) + +Run a Harper application from any location as a foreground, standard process (similar to `harper`). + +```bash +harper run /path/to/app +``` + +This command runs Harper with the specified application directory without automatic reloading or dev-specific features. + +### `harper dev` + +Added in: v4.2.0 (confirmed via release notes) + +Run Harper in development mode from a specified directory with automatic reloading. Recommended for local application development. Operates similar to `harper` and `harper run`. + +```bash +harper dev /path/to/app +``` + +**Features**: +- Pushes logs to standard streams automatically +- Uses a single thread for simpler debugging +- Auto-restart on file changes + +### `harper restart` + +Available since: v4.1.0 + +Restart a running Harper instance regardless if its a foreground (`harper`, `harper run`, or `harper dev`) or background (`harper start`) process. + +```bash +harper restart +``` + +### `harper start` + +Available since: v4.1.0 + +Start Harper in background (daemon mode). + +```bash +harper start +``` + +After installation, this command launches Harper as a background process. Remember that the Harper PID is available in a `hdb.pid` file within the installation directory. + +### `harper stop` + +Available since: v4.1.0 + +Stop a running Harper instance. + +```bash +harper stop +``` + +## Installation Commands + +### `harper install` + +Available since: v4.1.0 + +Install Harper with interactive prompts or automated configuration. + +```bash +harper install +``` + +The `harper install` command operates exactly like the [`harper`](#harper) command, but exits as soon as the installation completes. See the [`harper`](#harper) command documentation above for details on providing configuration parameters via environment variables or command line arguments. + +**Note**: We recommend using `harper` instead of `harper install` as it provides a consistent workflow for both installation and running Harper. + +## Information Commands + +### `harper version` + +Available since: v4.1.0 + +Display the installed Harper version. + +```bash +harper version +``` + +**Example Output**: +``` +4.7.0 +``` + +### `harper status` + +Available since: v4.1.0 + +Display the status of Harper and clustering. + +```bash +harper status +``` + +Shows: +- Harper process status +- Clustering network status +- Replication statuses + +In Harper versions where NATS is supported, this command also shows the clustering hub and leaf processes too. + +### `harper help` + +Available since: v4.1.0 + +Display all available Harper CLI commands with brief descriptions. + +```bash +harper help +``` + +## Maintenance Commands + +### `harper renew-certs` + +Available since: v4.1.0 + +Renew Harper-generated self-signed certificates. + +```bash +harper renew-certs +``` + +This command regenerates the self-signed SSL/TLS certificates used by Harper. + +### `harper copy-db` + +Available since: v4.1.0 + +Copy a Harper database with compaction to eliminate free-space and fragmentation. + +```bash +harper copy-db +``` + +**Parameters**: +- `` - Name of the source database +- `` - Full path to the target database file + +**Example**: + +```bash +harper copy-db data /home/user/hdb/database/copy.mdb +``` + +This copies the default `data` database to a new location with compaction applied. + +**Use Cases**: +- Database optimization +- Eliminating fragmentation +- Creating compacted backups +- Reclaiming free space + +See also: [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Database compaction reference") for more information. + +#### How Backups Work + +Harper uses a transactional commit process that ensures data on disk is always transactionally consistent with storage. This means Harper maintains database integrity in the event of a crash and allows you to use standard volume snapshot tools to make backups. + +**Backup Process**: + +Database files are stored in the `hdb/database` directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down), and database integrity will be preserved. + +**Important Notes**: + +- **Atomic Snapshots**: Use volume snapshot tools that create atomic snapshots +- **Not Safe**: Simply copying an in-use database file using `cp` is **not reliable** + - Progressive reads occur at different points in time + - Results in an unreliable copy that likely won't be usable +- **Safe Copying**: Standard file copying is only reliable for database files that are **not in use** + +**Recommended Backup Tools**: +- LVM snapshots +- ZFS snapshots +- BTRFS snapshots +- Cloud provider volume snapshots (AWS EBS, Azure Disk, GCP Persistent Disk) +- Enterprise backup solutions with snapshot capabilities + +## Remote Operations + +The CLI supports executing commands on remote Harper instances. For details, see [CLI Overview - Remote Operations](./overview.md#remote-operations). + +## See Also + +- [CLI Overview](./overview.md) - General CLI information +- [Operations API Commands](./operations-api-commands.md) - Operations available through CLI +- [CLI Authentication](./authentication.md) - Authentication mechanisms +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration") - Configuration parameters for installation +- [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Compaction") - More on database compaction diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md new file mode 100644 index 00000000..faf217d2 --- /dev/null +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -0,0 +1,376 @@ +--- +title: Operations API Commands +--- + + + + +# Operations API Commands + +Added in: v4.3.0 (confirmed via release notes) + +The Harper CLI supports executing operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview") directly from the command line. This enables powerful automation and scripting capabilities. + +## General Syntax + +```bash +harper = +``` + +**Output Format**: +- Default: YAML +- JSON: Pass `json=true` as a parameter + +## Supported Operations + + + +The following operations are available through the CLI. Operations that require complex nested parameters or object structures are not supported via CLI and must be executed through the HTTP API. + +### Complete Operations List + +:::note +This is just a brief overview of all operations available as CLI commands. Review the respective operation documentation for more information on available arguments and expected behavior. Keep in mind that all operations options are converted to CLI arguments in the same way (using `snake_case`). +::: + +| Operation | Description | Category | Available Since | +|-----------|-------------|----------|-----------------| +| `describe_table` | Describe table structure and metadata | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `describe_all` | Describe all databases and tables | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `describe_database` | Describe database structure | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `create_database` | Create a new database | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `drop_database` | Delete a database | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `create_table` | Create a new table | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `drop_table` | Delete a table | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `create_attribute` | Create a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `drop_attribute` | Delete a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `search_by_id` | Search records by ID | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `search_by_value` | Search records by attribute value | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `insert` | Insert new records | [Data](TODO:../operations-api/data.md) | v4.4.9 | +| `update` | Update existing records | [Data](TODO:../operations-api/data.md) | v4.4.9 | +| `upsert` | Insert or update records | [Data](TODO:../operations-api/data.md) | v4.4.9 | +| `delete` | Delete records | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `sql` | Execute SQL queries | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `csv_file_load` | Load data from CSV file | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `csv_url_load` | Load data from CSV URL | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `list_users` | List all users | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `add_user` | Create a new user | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `alter_user` | Modify user properties | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `drop_user` | Delete a user | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `list_roles` | List all roles | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `drop_role` | Delete a role | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `create_csr` | Create certificate signing request | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `sign_certificate` | Sign a certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `list_certificates` | List SSL/TLS certificates | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `add_certificate` | Add SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `remove_certificate` | Remove SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `add_ssh_key` | Add SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `get_ssh_key` | Get SSH key | [Security](TODO:../operations-api/security.md) | v4.7.2 | +| `update_ssh_key` | Update SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `delete_ssh_key` | Delete SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `list_ssh_keys` | List all SSH keys | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `set_ssh_known_hosts` | Set SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `get_ssh_known_hosts` | Get SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `cluster_get_routes` | Get cluster routing information | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `cluster_network` | Get cluster network status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `cluster_status` | Get cluster status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `remove_node` | Remove node from cluster | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `add_component` | Add a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `deploy_component` | Deploy a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `deploy` (alias) | Alias for `deploy_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `package_component` | Package a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `package` (alias) | Alias for `package_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `drop_component` | Remove a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `get_components` | List all components | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `get_component_file` | Get component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `set_component_file` | Set component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `install_node_modules` | Install Node.js dependencies | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `set_configuration` | Update configuration settings | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | +| `get_configuration` | Get current configuration | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | +| `create_authentication_tokens` | Create authentication tokens | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | +| `refresh_operation_token` | Refresh operation token | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | +| `restart_service` | Restart Harper service | [System](TODO:../operations-api/system.md) | v4.3.0 | +| `restart` | Restart Harper instance | [System](TODO:../operations-api/system.md) | v4.3.0 | +| `system_information` | Get system information | [System](TODO:../operations-api/system.md) | v4.3.0 | +| `registration_info` | Get registration information | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | +| `get_fingerprint` | Get instance fingerprint | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | +| `set_license` | Set license key | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | +| `get_usage_licenses` | Get usage and license info | [Licensing](TODO:../operations-api/licensing.md) | v4.7.3 | +| `get_job` | Get job status | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | +| `search_jobs_by_start_date` | Search jobs by start date | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | +| `read_log` | Read application logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `read_transaction_log` | Read transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `read_audit_log` | Read audit logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `delete_transaction_logs_before` | Delete old transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `purge_stream` | Purge streaming data | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | +| `delete_records_before` | Delete old records | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | +| `get_status` | Get custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | +| `set_status` | Set custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | +| `clear_status` | Clear custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | + +### Command Aliases + +The following aliases are available for convenience: + +- `deploy` → `deploy_component` +- `package` → `package_component` + +For detailed parameter information for each operation, see the [Operations API documentation](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Complete operations list"). + +## Command Examples + +### Database Operations + +**Describe a database**: + +```bash +harper describe_database database=dev +``` + +**Describe a table** (with YAML output): + +```bash +harper describe_table database=dev table=dog +``` + +**Example Output**: +```yaml +schema: dev +name: dog +hash_attribute: id +audit: true +schema_defined: false +attributes: + - attribute: id + is_primary_key: true + - attribute: name + indexed: true +clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b +record_count: 10 +last_updated_record: 1724483231970.9949 +``` + +:::tip +For detailed information on database and table structures, see the [Database Reference](TODO:reference_versioned_docs/version-v4/database/overview.md "Database reference documentation"). +::: + +### Data Operations + +**Search by ID** (with JSON output): + +```bash +harper search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true +``` + +**Search by value**: + +```bash +harper search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]' +``` + +:::tip +For more information on querying data, see the [REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST API reference") and [GraphQL Querying](TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md "GraphQL querying reference"). +::: + +### Configuration Operations + +**Set configuration**: + +```bash +harper set_configuration logging_level=error +``` + +**Get configuration**: + +```bash +harper get_configuration +``` + +:::tip +For comprehensive configuration options, see the [Configuration Reference](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration reference"). +::: + +### Component Operations + +**Deploy a component**: + +```bash +harper deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template +``` + +**Get all components**: + +```bash +harper get_components +``` + +**Note**: `deploy` is an alias for `deploy_component`: + +```bash +harper deploy project=my-app package=https://github.com/user/repo +``` + +:::tip +For more information on components and applications, see the [Components Reference](TODO:reference_versioned_docs/version-v4/components/overview.md "Components reference"). +::: + +### User and Role Operations + +**List users**: + +```bash +harper list_users +``` + +**List roles**: + +```bash +harper list_roles +``` + +:::tip +For detailed information on users, roles, and authentication, see the [Security Reference](TODO:reference_versioned_docs/version-v4/security/overview.md "Security reference"). +::: + +## Remote Operations + +All CLI operations can be executed on remote Harper instances. See [CLI Overview - Remote Operations](./overview.md#remote-operations) for details on authentication and remote execution. + +### Remote Component Deployment + +When using remote operations, you can deploy a local component or application to the remote instance. + +**Deploy current directory**: + +If you omit the `package` parameter, the current directory will be packaged and deployed: + +```bash +harper deploy target=https://server.com:9925 +``` + +**Note**: `deploy` is an alias for `deploy_component`. + +**Deploy to clustered environment**: + +For clustered environments, use the `replicated=true` parameter to ensure the deployment is replicated to all nodes: + +```bash +harper deploy target=https://server.com:9925 replicated=true +``` + +**Restart after deployment** (with replication): + +After deploying to a clustered environment, restart all nodes to apply changes: + +```bash +harper restart target=https://server.com:9925 replicated=true +``` + +For more information on Harper applications and components, see: +- [Applications](TODO:reference_versioned_docs/version-v4/applications/overview.md "Applications reference") - Application architecture and structure +- [Deploying Harper Applications](TODO:learn_link "Deploying applications guide") - Step-by-step deployment guide + +## Parameter Formatting + +### String Parameters + +Simple string values can be passed directly: + +```bash +harper describe_table database=dev table=dog +``` + +### Array Parameters + +Array parameters must be quoted and formatted as JSON: + +```bash +harper search_by_id database=dev table=dog ids='["1","2","3"]' +``` + +### Object Parameters + +Object parameters are not supported via CLI. For operations requiring complex nested objects, use: +- The [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API") via HTTP +- A custom script or tool + +### Boolean Parameters + +Boolean values can be passed as strings: + +```bash +harper get_configuration json=true +harper deploy target=https://server.com:9925 replicated=true +``` + +## Output Formatting + +### YAML (Default) + +By default, CLI operation results are formatted as YAML for readability: + +```bash +harper describe_table database=dev table=dog +``` + +### JSON + +Pass `json=true` to get JSON output (useful for scripting): + +```bash +harper describe_table database=dev table=dog json=true +``` + +## Scripting and Automation + +The Operations API commands through the CLI are ideal for: + +- Build and deployment scripts +- Automation workflows +- CI/CD pipelines +- Administrative tasks +- Monitoring and health checks + +**Example Script**: + +```bash +#!/bin/bash + +# Deploy component to remote cluster +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=$SECURE_PASSWORD + +harper deploy \ + target=https://cluster-node-1.example.com:9925 \ + replicated=true \ + package=https://github.com/myorg/my-component + +# Restart the cluster +harper restart \ + target=https://cluster-node-1.example.com:9925 \ + replicated=true + +# Check status +harper get_components \ + target=https://cluster-node-1.example.com:9925 \ + json=true +``` + +## Limitations + +The following operation types are **not supported** via CLI: + +- Operations requiring complex nested JSON structures +- Operations with array-of-objects parameters +- File upload operations +- Streaming operations + +For these operations, use the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API") directly via HTTP. + +## See Also + +- [CLI Overview](./overview.md) - General CLI information +- [CLI Commands](./commands.md) - Core CLI commands +- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview") - Operations API documentation +- [Operations API Reference](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Operations reference") - Complete operations list +- [CLI Authentication](./authentication.md) - Authentication details diff --git a/reference_versioned_docs/version-v4/cli/overview.md b/reference_versioned_docs/version-v4/cli/overview.md new file mode 100644 index 00000000..62978525 --- /dev/null +++ b/reference_versioned_docs/version-v4/cli/overview.md @@ -0,0 +1,196 @@ +--- +title: Harper CLI Overview +--- + + + + + + + +# Harper CLI Overview + +The Harper command line interface (CLI) is used to administer self-installed Harper instances. + +## Installation + +Available since: v4.1.0 + +Harper is typically installed globally via npm: + +```bash +npm i -g harperdb +``` + +The installation includes the Harper CLI, which provides comprehensive management capabilities for local and remote Harper instances. + +For detailed installation instructions, see the [Getting Started / Install And Connect Harper](https://docs.harperdb.io/docs/getting-started/install-and-connect-harper) guide. + +## Command Name + +Changed in: v4.7.0 + +The CLI command is `harper`. From v4.1.0 to v4.6.x, the command was only available as `harperdb`. Starting in v4.7.0, the preferred command is `harper`, though `harperdb` continues to work as an alias for backward compatibility. + +**Examples**: +```bash +# Modern usage (v4.7.0+) +harper +harper describe_table database=dev table=dog + +# Legacy usage (still supported) +harperdb +harperdb describe_table database=dev table=dog +``` + +All examples in this documentation use `harper`. + +## General Usage + +The primary way to use Harper is to run the `harper` command. When you run `harper`: + +- If Harper is not installed, it will guide you through the installation process +- Once installed, it runs Harper in the foreground as a standard process +- This makes it compatible with systemd, Docker, and other process management tools +- Output logs directly to the console for easy monitoring + +The CLI supports two main categories of commands: + +1. **System Commands** - Core Harper management commands (start, stop, restart, status, etc.) +2. **Operations API Commands** - Execute operations from the Operations API directly via the CLI + +Both system and operations commands can be executed on local or remote Harper instances. For remote operations, authentication credentials can be provided via command parameters or environment variables. + +### CLI Installation Targeting + +By default, the CLI targets the Harper installation path stored in `~/.harperdb/hdb_boot_properties.file`. You can override this to target a specific Harper installation by specifying the `--ROOTPATH` command line argument or the `ROOTPATH` environment variable. + +**Example: Target a specific installation**: + +```bash +# Using command line argument +harper status --ROOTPATH /custom/path/to/hdb + +# Using environment variable +export ROOTPATH=/custom/path/to/hdb +harper status +``` + +### Process ID File + +When Harper is running, the process identifier (PID) is stored in a file named `hdb.pid` located in the Harper installation directory. This file can be used by external process management tools or scripts to monitor or manage the Harper process. + +**Location**: `/hdb.pid` + +**Example**: +```bash +# Read the PID +cat /path/to/hdb/hdb.pid + +# Use with external tools +kill -0 $(cat /path/to/hdb/hdb.pid) # Check if process is running +``` + +## System Management Commands + +| Command | Description | Available Since | +|---------|-------------|-----------------| +| `harper` | Run Harper in foreground mode (default behavior) | v4.1.0 | +| `harper run ` | Run Harper application from any directory | v4.2.0 | +| `harper dev ` | Run Harper in dev mode with auto-restart and console logging | v4.2.0 | +| `harper restart` | Restart Harper | v4.1.0 | +| `harper start` | Start Harper in background (daemon mode) | v4.1.0 | +| `harper stop` | Stop a running Harper instance | v4.1.0 | +| `harper status` | Display Harper and clustering status | v4.1.0 | +| `harper version` | Show installed Harper version | v4.1.0 | +| `harper renew-certs` | Renew Harper-generated self-signed certificates | v4.1.0 | +| `harper copy-db ` | Copy a database with compaction | v4.1.0 | +| `harper help` | Display all available CLI commands | v4.1.0 | + +See [CLI Commands](./commands.md) for detailed documentation on each command. + +## Operations API Commands + +Added in: v4.3.0 (confirmed via release notes) + +The Harper CLI supports executing most operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview") directly from the command line. This includes operations that do not require complex nested parameters. + +**Syntax**: `harper =` + +**Output Format**: Results are formatted as YAML by default. Pass `json=true` for JSON output. + +**Examples**: + +```bash +# Describe a table +harper describe_table database=dev table=dog + +# Set configuration +harper set_configuration logging_level=error + +# Deploy a component +harper deploy_component project=my-app package=https://github.com/user/repo + +# Get all components +harper get_components + +# Search by ID (JSON output) +harper search_by_id database=dev table=dog ids='["1"]' json=true + +# SQL query +harper sql sql='select * from dev.dog where id="1"' +``` + +See [Operations API Commands](./operations-api-commands.md) for the complete list of available operations. + +## Remote Operations + +Changed in: v4.3.0 (expanded remote operations support) + +The CLI can execute operations on remote Harper instances by passing the `target` parameter with the HTTP address of the remote instance. + +**Authentication**: Provide credentials via: +- Parameters: `username= password=` +- Environment variables: `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` + +See [CLI Authentication](./authentication.md) for detailed information on authentication methods and best practices. + +**Example: CLI Target Environment Variables**: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=password +harper describe_database database=dev target=https://server.com:9925 +``` + +**Example: CLI Options**: + +```bash +harper describe_database database=dev target=https://server.com:9925 username=HDB_ADMIN password=password +``` + +## Development Mode + +Added in: v4.2.0 (confirmed via release notes) + +For local application and component development, use `harper dev`: + +```bash +harper dev /path/to/app +``` + +**Features**: +- Console logging for immediate feedback +- Debugging enabled +- Auto-restart on file changes +- Ideal for rapid iteration during development + +See [CLI Commands](./commands.md) for detailed information on `harper dev` and other development commands. + +## See Also + +- [CLI Commands](./commands.md) - Detailed reference for each CLI command +- [Operations API Commands](./operations-api-commands.md) - Operations available through CLI +- [CLI Authentication](./authentication.md) - Authentication mechanisms +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview") - Harper configuration options +- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API") - Full operations API reference diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 5a3f77b6..6229878b 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -1,8 +1,37 @@ { - "docsSidebar": [ + "referenceSidebar": [ { - "type": "autogenerated", - "dirName": "." + "type": "doc", + "id": "index", + "label": "Welcome" + }, + { + "type": "category", + "label": "CLI", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "cli/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "cli/commands", + "label": "Commands" + }, + { + "type": "doc", + "id": "cli/operations-api-commands", + "label": "Operations API Commands" + }, + { + "type": "doc", + "id": "cli/authentication", + "label": "Authentication" + } + ] } ] } diff --git a/v4-docs-execution-procedure.md b/v4-docs-execution-procedure.md deleted file mode 100644 index 25181c1e..00000000 --- a/v4-docs-execution-procedure.md +++ /dev/null @@ -1,591 +0,0 @@ -# Harper v4 Documentation Migration - Execution Procedure - -This document outlines the practical execution approach for implementing the v4 documentation migration defined in [v4-docs-implementation-plan.md](./v4-docs-implementation-plan.md). - -## Overview - -**Goal**: Use AI agents (Claude Code) to generate initial documentation drafts, then have humans review and refine. - -**Approach**: Start with a pilot section to validate the process, then scale to remaining sections. - -**Timeline**: Estimated 3-4 weeks total (1-2 days AI generation + 2-3 weeks human review) - ---- - -## Execution Strategy - -### Recommended Approach: VSCode Extension with Pilot - -We'll use Claude Code in VSCode to orchestrate the migration. This gives us: -- ✅ Full visibility and control over what's being generated -- ✅ Ability to course-correct between sections -- ✅ Easy local testing before pushing -- ✅ Familiar development workflow - -**Alternative considered**: Fully automated Agent SDK script. While this could generate all 20 PRs at once, we prefer the hybrid approach to validate quality first and maintain control. - -### Three-Phase Approach - -**Phase 1: Pilot (2 sections)** -- Run CLI section migration to test the process -- Review output quality and completeness -- Run Security section as validation -- Refine prompts and procedures based on learnings - -**Phase 2: Scale (Remaining 18 sections)** -- Continue with VSCode approach for remaining sections -- Run 2-3 sections in parallel (multiple VSCode windows) -- Option to build Agent SDK automation if VSCode becomes tedious - -**Phase 3: Finalization** -- Resolve link placeholders -- Update cross-references -- Configure sidebars and redirects -- Final cleanup - ---- - -## Prerequisites - -### Environment Setup - -```bash -# Ensure on correct branch -git checkout major-version-reorg -git pull - -# Create directory for tracking metadata -mkdir -p migration-context/link-placeholders - -# Verify gh CLI is authenticated -gh auth status - -# Verify VSCode with Claude Code extension is installed and configured -``` - -### Required Files (Already Created) - -- ✅ `v4-docs-implementation-plan.md` - Detailed implementation instructions -- ✅ `v4-docs-migration-map.md` - Mapping of old → new paths -- ✅ `v4-docs-reference-plan.md` - Target structure and philosophy -- ✅ `v4-docs-research.md` - Manual research notes -- ✅ `v4-feature-history-ai-gen.md` - AI-generated feature history - ---- - -## Phase 1: Pilot Execution - -### Pilot Section 1: CLI - -**Why CLI first?** -- Relatively stable across versions -- Simple structure (4 files) -- Good test of the entire workflow -- Low risk if something goes wrong - -**Steps:** - -1. **Start Claude Code in VSCode** - - Open VSCode in the documentation repository - - Start a new Claude Code chat - -2. **Provide the prompt:** - ``` - I need you to migrate the CLI section following the implementation plan. - - Context files to read: - - v4-docs-implementation-plan.md (Part 1: Initial Content Generation) - - v4-docs-migration-map.md (CLI Section) - - v4-docs-reference-plan.md (overall structure) - - Task: - 1. Read the CLI section entry from the migration map - 2. Read all source files listed (versioned_docs/version-4.7/deployments/harper-cli.md, etc.) - 3. Read release notes for version annotations - 4. Generate new files in reference_versioned_docs/version-v4/cli/ with: - - Inline source comments - - Version annotations with confidence levels - - Link placeholders for cross-references - 5. Create migration-context/link-placeholders/cli-link-placeholders.md - 6. Create branch: migration/cli - 7. Commit changes - 8. Open PR using the template from implementation plan - 9. Update v4-docs-migration-map.md status to "In Progress" - - Follow all agent instructions from Part 1 of the implementation plan. - ``` - -3. **Monitor the process:** - - Watch as Claude Code reads files and generates content - - Review generated files as they're created - - Check that inline source comments are present - - Verify branch and commit are created - -4. **Review the PR:** - - Check PR description follows template - - Verify all required sections are filled out - - Note quality of content, version annotations, placeholders - -5. **Document findings:** - - What worked well? - - What needs improvement? - - Any prompt refinements needed? - -### Pilot Section 2: Security - -**Why Security second?** -- More complex than CLI (8 files) -- Tests handling of cross-cutting concerns -- Validates the process scales beyond simple sections - -**Steps:** - -1. **Refine prompt based on CLI learnings** -2. **Run same process** with Security section -3. **Compare results** - is quality consistent? -4. **Decide on scaling approach:** - - If both pilots successful → continue with VSCode - - If quality issues → refine prompts, try again - - If tedious/repetitive → consider Agent SDK automation - ---- - -## Phase 2: Scale Execution - -### Batch Processing - -Organize remaining 18 sections into batches based on the implementation plan: - -**Batch 1: Simple sections (3 sections)** -- Content Types -- Headers -- GraphQL Querying - -**Batch 2: Medium complexity (7 sections)** -- Environment Variables -- Static Files -- HTTP -- MQTT -- Logging -- Analytics -- Studio - -**Batch 3: Complex sections (5 sections)** -- REST -- Replication -- Database -- Resources -- Components - -**Batch 4: Cross-cutting (2 sections)** -- Operations API -- Configuration - -**Batch 5: Legacy (1 section)** -- Legacy content - -### Parallel Execution - -**Option A: Sequential** -- Run one section at a time -- Safest approach -- Slower but easier to manage - -**Option B: Parallel (Recommended)** -- Open 2-3 VSCode windows -- Run 2-3 sections simultaneously -- Faster while maintaining control -- Can handle ~5 sections per day - -**Option C: Automated** -- Build Agent SDK script after successful pilots -- Generate all remaining PRs at once -- Fastest but less control - -### Prompt Template - -For each section, use this template (customize [PLACEHOLDERS]): - -``` -Migrate the [SECTION] section following the implementation plan. - -Context files: -- v4-docs-implementation-plan.md (Part 1 instructions) -- v4-docs-migration-map.md ([SECTION] Section entry) -- v4-docs-reference-plan.md (structure reference) - -Key details for this section: -- Output directory: reference_versioned_docs/version-v4/[section]/ -- Primary source: [PRIMARY_SOURCE_PATH from migration map] -- Additional sources: [LIST from migration map] -- Link placeholder tracker: migration-context/link-placeholders/[section]-link-placeholders.md - -Task: -1. Read the [SECTION] section entry from migration map -2. Read all source files -3. Read relevant release notes -4. Generate new reference files following the structure -5. Include inline source comments for traceability -6. Add version annotations with confidence levels -7. Use link placeholders for cross-references -8. Create link placeholder tracker -9. Create branch: migration/[section] -10. Commit with message: "docs: migrate [section] to v4 consolidated reference" -11. Open PR using the template -12. Update migration map status to "In Progress" - -Follow all Part 1 agent instructions carefully. -``` - ---- - -## Phase 3: Human Review Process - -### For Each PR - -**Review Checklist:** - -1. **Content Quality** - - [ ] Is the content accurate and complete? - - [ ] Does it make sense to a reader? - - [ ] Are examples clear and correct? - -2. **Version Annotations** - - [ ] Are version annotations present where appropriate? - - [ ] Do they match release notes/version comparisons? - - [ ] Are confidence levels noted (verified vs. inferred)? - -3. **Source Documentation** - - [ ] Are inline source comments present? - - [ ] Can we trace content back to original sources? - - [ ] Is the PR description complete? - -4. **Link Placeholders** - - [ ] Are placeholders in the correct format? - - [ ] Is the link tracker file created? - - [ ] Do placeholders make sense for targets? - -5. **Structure** - - [ ] Files in correct location (reference_versioned_docs/version-v4/)? - - [ ] Follows the structure from reference plan? - - [ ] No removal of versioned_docs content? - -### Review Workflow - -1. Reviewer assigned to PR -2. Reviewer goes through checklist -3. Reviewer edits content directly in PR (or requests changes) -4. Reviewer resolves any "needs verification" annotations -5. Reviewer handles image decisions (if any) -6. Reviewer approves and merges -7. Reviewer updates migration-map.md status to "Complete" -8. Reviewer checks off tracking issue - -### Review Velocity - -- Target: 2-3 PRs reviewed per day -- Simple sections: 30-60 minutes each -- Complex sections: 2-4 hours each -- Total review time: ~2-3 weeks - ---- - -## Phase 4: Post-Generation Cleanup - -After all sections are merged, run cleanup phases from implementation plan. - -### 4.1: Link Resolution - -**Using Claude Code:** - -``` -Resolve link placeholders following Part 3 of the implementation plan. - -Context: -- All migration-context/link-placeholders/*.md files -- All reference_versioned_docs/version-v4/ files - -Task: -1. Read all placeholder tracker files -2. Scan reference_versioned_docs/version-v4/ to see what exists -3. For each placeholder, replace TODO:path with correct relative path -4. Create PR(s) for link resolution (one per section recommended) -5. Flag any unresolvable links for human review - -Follow Part 3 instructions from implementation plan. -``` - -### 4.2: Cross-Reference Updates - -Update links in release_notes/ and learn/ content: - -``` -Update cross-references following Part 4 of the implementation plan. - -Task: -1. Scan release_notes/ for old documentation paths -2. Map to new paths using migration map -3. Update links -4. Create PR - -Do the same for learn/ content. -``` - -### 4.3: Sidebar Configuration - -``` -Create Docusaurus sidebar configuration following Part 5 of the implementation plan. - -Task: -1. Read reference plan outline for hierarchy -2. Scan reference_versioned_docs/version-v4/ for actual files -3. Generate sidebar JSON/JS following Docusaurus conventions -4. Ensure non-collapsible sections as noted in plan -5. Create PR -``` - -### 4.4: Redirects - -``` -Configure redirects following Part 6 of the implementation plan. - -Task: -1. Analyze existing redirects.ts -2. Use migration map to determine new paths -3. Generate redirect rules (prioritize most-visited pages) -4. Create PR -``` - -### 4.5: Final Cleanup - -**Human tasks:** -1. Review orphaned content (files not in migration map) -2. Remove old versioned_docs/version-4.X/ folders -3. Build docs locally and validate -4. Test redirects -5. Final spot-checks - ---- - -## Progress Tracking - -### GitHub Tracking Issue - -Create an issue titled "v4 Documentation Migration Progress Tracker" with this body: - -```markdown -Tracking migration of v4 documentation to consolidated structure. - -## Phase 1: Pilots -- [ ] #[PR] CLI (Pilot 1) -- [ ] #[PR] Security (Pilot 2) - -## Phase 2: Batch 1 - Simple -- [ ] #[PR] Content Types -- [ ] #[PR] Headers -- [ ] #[PR] GraphQL Querying - -## Phase 2: Batch 2 - Medium -- [ ] #[PR] Environment Variables -- [ ] #[PR] Static Files -- [ ] #[PR] HTTP -- [ ] #[PR] MQTT -- [ ] #[PR] Logging -- [ ] #[PR] Analytics -- [ ] #[PR] Studio - -## Phase 2: Batch 3 - Complex -- [ ] #[PR] REST -- [ ] #[PR] Replication -- [ ] #[PR] Database -- [ ] #[PR] Resources -- [ ] #[PR] Components - -## Phase 2: Batch 4 - Cross-cutting -- [ ] #[PR] Operations API -- [ ] #[PR] Configuration - -## Phase 2: Batch 5 - Legacy -- [ ] #[PR] Legacy Content - -## Phase 3: Cleanup -- [ ] Link resolution -- [ ] Cross-references updated -- [ ] Sidebars configured -- [ ] Redirects configured -- [ ] Old content removed - -## Phase 4: Finalization -- [ ] Final validation complete -- [ ] Merged to main -``` - -### Migration Map Status - -Update `v4-docs-migration-map.md` status field for each section: -- "In Progress" when PR is opened -- "Complete" when PR is merged - ---- - -## Team Roles - -### AI Agent (Claude Code) -- Generate initial content drafts -- Follow migration map and implementation plan -- Create branches, commits, PRs -- Track placeholders and sources - -### Human Reviewers -- Verify content accuracy -- Validate version annotations -- Edit and improve content -- Make final decisions on uncertainties -- Merge PRs - -### Project Lead -- Coordinate the migration -- Assign reviewers to PRs -- Monitor progress via tracking issue -- Make decisions on edge cases - ---- - -## Communication Plan - -### Kickoff Meeting -- Present this plan to the team -- Walk through pilot sections -- Assign initial reviewers -- Set expectations for review velocity - -### Weekly Syncs -- Review progress on tracking issue -- Discuss any blockers or issues -- Adjust approach if needed -- Assign upcoming reviews - -### Ad-hoc Communication -- Slack/Discord for quick questions -- PR comments for content-specific discussions -- Document any process improvements - ---- - -## Success Metrics - -- [ ] All 20 sections have PRs opened -- [ ] All PRs pass initial quality review -- [ ] 95%+ of version annotations verified -- [ ] All link placeholders resolved -- [ ] Documentation builds without errors -- [ ] Old versioned_docs removed -- [ ] Successfully merged to main - ---- - -## Risk Mitigation - -### Risk: AI generates incorrect content -**Mitigation**: -- Pilot sections first to validate quality -- Inline source documentation for traceability -- Human review on every PR -- Can always reference original sources - -### Risk: Process takes longer than expected -**Mitigation**: -- Flexible timeline (3-4 weeks is estimate) -- Can parallelize more aggressively if needed -- Can pause and adjust if blockers arise - -### Risk: Link placeholders are confusing -**Mitigation**: -- Clear format defined upfront -- Section-specific tracker files -- Separate cleanup phase dedicated to resolving them - -### Risk: Team capacity for reviews -**Mitigation**: -- Can adjust review velocity -- Can spread reviews over longer period -- Simple sections are quick to review - ---- - -## Decision Points - -### After Pilot Phase -**Decision**: Continue with VSCode or build Agent SDK automation? -- **If** pilots are successful and quality is good → Continue with VSCode -- **If** VSCode becomes tedious → Build Agent SDK script -- **If** quality issues → Refine prompts and retry - -### During Scale Phase -**Decision**: Sequential or parallel execution? -- **If** team has capacity → Run 2-3 sections in parallel -- **If** reviewers are overwhelmed → Slow down to sequential -- **If** going well → Scale up to more parallelization - -### Before Final Cleanup -**Decision**: Ready to remove old content? -- **If** all content migrated and verified → Proceed with removal -- **If** any uncertainties remain → Pause and resolve -- **If** redirects not ready → Complete redirects first - ---- - -## Next Steps - -1. **Review this plan with team** - Get feedback and buy-in -2. **Set up environment** - Ensure VSCode, Claude Code, gh CLI ready -3. **Create tracking issue** - Set up progress tracking -4. **Run pilot 1 (CLI)** - Execute and evaluate -5. **Team sync** - Review pilot results and decide on scaling approach -6. **Scale execution** - Continue with remaining sections -7. **Complete cleanup** - Final phases and merge to main - ---- - -## Questions for Team Discussion - -1. Who will be responsible for reviewing PRs? (Assign per section or per batch?) -2. What's our target review velocity? (How many PRs per day can we handle?) -3. Should we run pilots first, or are we confident enough to start scaling immediately? -4. Do we want to build Agent SDK automation, or stick with VSCode throughout? -5. Any concerns about the 3-4 week timeline? -6. Any sections that need special attention or domain expertise? - ---- - -## Appendix: Troubleshooting - -### If Claude Code goes off track -- Stop the generation -- Review what it's done so far -- Refine the prompt with more specific guidance -- Restart with the refined prompt - -### If content quality is poor -- Check that Claude Code read the right source files -- Verify inline source comments are present -- Look at similar PRs to see if it's a pattern -- Refine the prompt to be more specific about quality expectations - -### If link placeholders are confusing -- Review the placeholder tracker file -- Check the format matches the specification -- Update the placeholder with more context/description -- Flag for human reviewer to fix in the PR - -### If version annotations are wrong -- Check release notes to verify -- Look at git history of source files -- Mark as "needs verification" and have human research -- Update in the review process - -### If merge conflicts occur -- Should be rare since we're only adding files -- If they happen, likely in migration-map.md or tracking files -- Resolve manually, prioritizing latest changes -- Consider using more granular tracker files per section diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index a17ccdf1..24590266 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -15,7 +15,7 @@ This document outlines the concrete steps for migrating Harper v4 documentation ## Part 1: Initial Content Generation (AI-Driven) ### Overview -AI agents work through the migration map, creating PRs for each top-level section. All PRs are opened simultaneously from the same base commit. Each PR adds new files without removing anything from `versioned_docs/`. +AI agents work through the migration map, creating PRs for each top-level section. Each PR adds new files without removing anything from `versioned_docs/`. ### Agent Instructions @@ -37,6 +37,11 @@ For each section in the migration map, the agent should: [JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md "Will be created in security section") ``` + **IMPORTANT**: After generating all files in the section, replace TODO placeholders with relative paths for internal section links: + - For links within the same section: Use relative paths like `./filename.md` + - For links to other sections not yet migrated: Keep TODO placeholders + - Example: `[CLI Commands](./commands.md)` NOT `[CLI Commands](TODO:reference_versioned_docs/version-v4/cli/commands.md)` + 7. **Create section-specific link placeholder tracker**: - Store in `migration-context/link-placeholders/` - Named by section: `cli-link-placeholders.md`, `security-link-placeholders.md`, etc. @@ -78,10 +83,49 @@ For each section in the migration map, the agent should: ![Architecture Diagram](TODO:IMAGE) ``` -11. **Create PR** with comprehensive description (template below) +11. **Update the versioned sidebar** at `reference_versioned_sidebars/version-v4-sidebars.json`: + - Add a non-collapsible category for the section + - List all pages in the appropriate order + - Match the pattern from `sidebarsLearn.ts` (non-collapsible with `className: "learn-category-header"`) + - Example: + ```json + { + "type": "category", + "label": "CLI", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "cli/overview", + "label": "Overview" + }, + // ... + ] + } + ``` 12. **Update migration-map.md** status to "In Progress" for that section +13. **Git workflow with fixup commits**: + - Create feature branch: `git checkout -b migration/[section-name]` + - Make initial commit with all content files + - Use `git commit --fixup ` for subsequent changes + - This allows easy squashing later while keeping development history clear + - Example: + ```bash + # Initial commit + git add reference_versioned_docs/version-v4/cli/*.md + git commit -m "docs: migrate CLI section to v4 consolidated reference" + + # Subsequent fixes use --fixup + git add reference_versioned_sidebars/version-v4-sidebars.json + git commit --fixup HEAD + ``` + - PRs will be squash-merged to maintain clean history on main branch + +14. **Create PR** with comprehensive description (template below) + ### PR Description Template ```markdown @@ -151,98 +195,121 @@ Updated status for this section to "In Progress" ### Sections to Migrate (In Order of Priority) -Based on migration map, recommend this order: - -**Phase 1A - Simple, Stable Sections (Parallel PRs)** -1. CLI (`reference_versioned_docs/version-v4/cli/`) -2. Content Types (`reference_versioned_docs/version-v4/rest/content-types.md`) -3. Headers (`reference_versioned_docs/version-v4/rest/headers.md`) -4. GraphQL Querying (`reference_versioned_docs/version-v4/graphql-querying/`) -5. Studio (`reference_versioned_docs/version-v4/studio/`) - -**Phase 1B - Medium Complexity (Parallel PRs)** -6. Security (`reference_versioned_docs/version-v4/security/`) -7. Environment Variables (`reference_versioned_docs/version-v4/environment-variables/`) -8. Static Files (`reference_versioned_docs/version-v4/static-files/`) -9. HTTP (`reference_versioned_docs/version-v4/http/`) -10. MQTT (`reference_versioned_docs/version-v4/mqtt/`) -11. Logging (`reference_versioned_docs/version-v4/logging/`) -12. Analytics (`reference_versioned_docs/version-v4/analytics/`) - -**Phase 1C - Complex Sections (Parallel PRs, expect longer review)** -13. REST (`reference_versioned_docs/version-v4/rest/`) -14. Replication (`reference_versioned_docs/version-v4/replication/`) -15. Database (`reference_versioned_docs/version-v4/database/`) -16. Resources (`reference_versioned_docs/version-v4/resources/`) -17. Components (`reference_versioned_docs/version-v4/components/`) - -**Phase 1D - Cross-Cutting Sections (After others to minimize placeholders)** -18. Operations API (`reference_versioned_docs/version-v4/operations-api/`) -19. Configuration (`reference_versioned_docs/version-v4/configuration/`) - -**Phase 1E - Legacy Content (Simple moves)** -20. Legacy (`reference_versioned_docs/version-v4/legacy/`) - -### Progress Tracking - -Create GitHub issue to track progress: - -**Title**: "v4 Documentation Migration Progress Tracker" - -**Body**: -```markdown -Tracking migration of v4 documentation to consolidated structure. - -## Phase 1A - Simple Sections -- [ ] #[PR] CLI -- [ ] #[PR] Content Types -- [ ] #[PR] Headers -- [ ] #[PR] GraphQL Querying -- [ ] #[PR] Studio - -## Phase 1B - Medium Complexity -- [ ] #[PR] Security -- [ ] #[PR] Environment Variables -- [ ] #[PR] Static Files -- [ ] #[PR] HTTP -- [ ] #[PR] MQTT -- [ ] #[PR] Logging -- [ ] #[PR] Analytics - -## Phase 1C - Complex Sections -- [ ] #[PR] REST -- [ ] #[PR] Replication -- [ ] #[PR] Database -- [ ] #[PR] Resources -- [ ] #[PR] Components - -## Phase 1D - Cross-Cutting -- [ ] #[PR] Operations API -- [ ] #[PR] Configuration - -## Phase 1E - Legacy -- [ ] #[PR] Legacy Content - -## Part 2 - Link Resolution -- [ ] Links resolved - -## Part 3 - Cross-References -- [ ] Release notes updated -- [ ] Learn guides updated - -## Part 4 - Finalization -- [ ] Sidebars created -- [ ] Old content removed -- [ ] Redirects configured -``` - -After each PR is created, agent adds comment to this issue: -```markdown -Created PR #123 for [Section Name] migration -- Files: X created -- Placeholders: Y links need resolution -- Status: Awaiting human review -``` +Based on migration map and reference plan, recommend this order. Each section is generated as a complete unit with all its pages at once: + +**Phase 1A - Simple, Stable Sections** + +1. **CLI** (`reference_versioned_docs/version-v4/cli/`) + - `overview.md` + - `commands.md` + - `operations-api-commands.md` + - `authentication.md` + +2. **GraphQL Querying** (`reference_versioned_docs/version-v4/graphql-querying/`) + - `overview.md` + +3. **Studio** (`reference_versioned_docs/version-v4/studio/`) + - `overview.md` + +4. **Fastify Routes** (`reference_versioned_docs/version-v4/fastify-routes/`) + - `overview.md` + +**Phase 1B - Medium Complexity** + +1. **Environment Variables** (`reference_versioned_docs/version-v4/environment-variables/`) + - `overview.md` + - `configuration.md` + +2. **Static Files** (`reference_versioned_docs/version-v4/static-files/`) + - `overview.md` + - `configuration.md` + +3. **HTTP** (`reference_versioned_docs/version-v4/http/`) + - `overview.md` + - `configuration.md` + - `api.md` + +4. **MQTT** (`reference_versioned_docs/version-v4/mqtt/`) + - `overview.md` + - `configuration.md` + +5. **Logging** (`reference_versioned_docs/version-v4/logging/`) + - `overview.md` + - `configuration.md` + - `api.md` + - `operations.md` + +6. **Analytics** (`reference_versioned_docs/version-v4/analytics/`) + - `overview.md` + - `operations.md` + +**Phase 1C - Complex Sections** + +1. **Security** (`reference_versioned_docs/version-v4/security/`) + - `overview.md` + - `basic-authentication.md` + - `jwt-authentication.md` + - `mtls-authentication.md` + - `certificate-management.md` + - `certificate-verification.md` + - `cors.md` + - `ssl.md` + - `users-and-roles.md` + +2. **REST** (`reference_versioned_docs/version-v4/rest/`) + - `overview.md` + - `querying.md` + - `headers.md` + - `content-types.md` + - `websockets.md` + - `server-sent-events.md` + +3. **Database** (`reference_versioned_docs/version-v4/database/`) + - `overview.md` + - `schema.md` + - `data-loader.md` + - `storage-algorithm.md` + - `jobs.md` + - `system-tables.md` + - `compaction.md` + - `transaction.md` + +4. **Resources** (`reference_versioned_docs/version-v4/resources/`) + - `overview.md` + - `resource-api.md` + - `global-apis.md` + - `query-optimization.md` + +5. **Components** (`reference_versioned_docs/version-v4/components/`) + - `overview.md` + - `applications.md` + - `extension-api.md` + - `plugin-api.md` + +6. **Replication** (`reference_versioned_docs/version-v4/replication/`) + - `overview.md` + - `clustering.md` + - `sharding.md` + +**Phase 1D - Cross-Cutting Sections** + +1. **Operations API** (`reference_versioned_docs/version-v4/operations-api/`) + - `overview.md` + - `operations.md` + +2. **Configuration** (`reference_versioned_docs/version-v4/configuration/`) + - `overview.md` + - `options.md` + - `operations.md` + +**Phase 1E - Legacy Content** + +1. **Legacy** (`reference_versioned_docs/version-v4/legacy/`) + - `cloud/` (entire folder as-is) + - `custom-functions/` (entire folder as-is) + - `sql/` (entire folder as-is) + +(But ensure we reflect version changes from v4.1 to v4.7 using version annotations) --- @@ -258,7 +325,6 @@ Created PR #123 for [Section Name] migration - Handle image decisions 3. **Human approves and merges PR** 4. **Human updates migration-map.md** status to "Complete" -5. **Human checks off tracking issue** --- @@ -326,7 +392,7 @@ Update other parts of documentation that reference the old structure. **Task**: Update links in learn guides to point to new reference structure. **Agent Instructions**: -1. Scan all files in `learn/` (or wherever learn content lives) +1. Scan all files in `learn/` 2. Find links to old reference paths 3. Map to new paths 4. Create PR with updates @@ -342,34 +408,7 @@ Update other parts of documentation that reference the old structure. --- -## Part 5: Sidebar Configuration (AI-Assisted) - -Create Docusaurus sidebar configuration for new structure. - -### Agent Instructions - -1. **Read the reference plan outline** to understand hierarchy -2. **Scan `reference_versioned_docs/version-v4/`** to see what actually exists -3. **Generate sidebar JSON/JS** following Docusaurus conventions: - ```javascript - { - type: 'category', - label: 'CLI', - items: [ - 'cli/overview', - 'cli/commands', - 'cli/operations-api-commands', - 'cli/authentication' - ] - } - ``` -4. **Follow existing sidebar patterns** from current docs -5. **Ensure non-collapsible sections** (as noted in reference plan) -6. **Create PR** with sidebar configuration - ---- - -## Part 6: Redirects Configuration (AI-Assisted) +## Part 5: Redirects Configuration (AI-Assisted) Configure redirects from old paths to new paths. @@ -425,9 +464,10 @@ Focus on: Once everything on `major-version-reorg` branch is complete: 1. Final review of entire branch -2. Squash/organize commits if needed -3. Merge to `main` -4. Deploy +3. Squash/organize commits if needed +4. Format +5. Merge to `main` +6. Deploy --- @@ -440,10 +480,7 @@ Once everything on `major-version-reorg` branch is complete: - `v4-docs-reference-plan.md` - Understanding structure and philosophy - `versioned_docs/version-4.X/**/*.md` - Source content - `release_notes/*.md` - Version annotation validation - -**Secondary**: - `v4-docs-research.md` - Manual research notes -- `v4-feature-history-ai-gen.md` - AI-generated feature history (use with caution) ### Agent Constraints diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 2983c57a..aad442a6 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -24,7 +24,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/harper-cli.md` (if exists) - **Merge Required**: Yes - CLI commands added across versions - **Version Annotations**: Track command additions from v4.1 → v4.7 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API commands - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Dev mode (`harperdb dev`, `harperdb run`) @@ -33,7 +33,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: Compare all versions for command evolution - **Version Annotations**: Each command should note its introduction version -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Foreground mode changes @@ -42,13 +42,13 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: `versioned_docs/version-4.3+` (CLI ops api support added in v4.3) - **Version Annotations**: Note v4.3.0 introduction -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API ### `reference/cli/authentication.md` - **Primary Source**: New content or extract from CLI docs -- **Status**: Not Started +- **Status**: In Progress --- @@ -698,13 +698,6 @@ Files that heavily reference paths that will change: --- -## Migration Workflow Recommendations - -1. **Start with stable, simple sections** (CLI, Content Types, Headers) -2. **Then tackle medium complexity** (Security, Logging, MQTT) -3. **Save complex merges for later** (Configuration, Schema, Components, Clustering) -4. **Move legacy content last** (SQL, Cloud, Custom Functions) - ## Version Annotation Checklist For each file migrated, ensure: @@ -717,9 +710,9 @@ For each file migrated, ensure: ## Release Notes Reference Guide -The `release-notes/v4-tucker/` directory contains 169 release note files covering the entire v4.0 - v4.7 series. Key major releases with significant feature additions: +The `release-notes/v4-tucker/` directory contains 169 release note files covering the entire v4.0 - v4.7 series. Key minor releases with significant feature additions: -### Major Releases +### Minor Releases - **[4.1.0](release-notes/v4-tucker/4.1.0.md)** (Worker threads, iterator-based queries, logging revamp) - **[4.2.0](release-notes/v4-tucker/4.2.0.md)** (Resource API, Component Architecture, REST interface, MQTT/WebSockets/SSE, configurable schemas) diff --git a/v4-feature-history-ai-gen.md b/v4-feature-history-ai-gen.md deleted file mode 100644 index 59e4c1d6..00000000 --- a/v4-feature-history-ai-gen.md +++ /dev/null @@ -1,1692 +0,0 @@ -# HarperDB v4 Feature Mapping & Version History -## Consolidation Reference Document - -**Purpose**: This document maps all features across HarperDB v4.1 through v4.7 to guide the consolidation of versioned documentation into a single unified directory. It tracks feature introductions, changes, deprecations, and removals. - -**Created**: 2026-02-05 -**Source Analysis**: versioned_docs (v4.1-v4.7) + release-notes/v4-tucker - ---- - -## Table of Contents -1. [Executive Summary](#executive-summary) -2. [Critical Deprecations & Removals](#critical-deprecations--removals) -3. [Major Feature Additions by Version](#major-feature-additions-by-version) -4. [Feature-by-Feature Version History](#feature-by-feature-version-history) -5. [Operations API Evolution](#operations-api-evolution) -6. [Documentation Structure Evolution](#documentation-structure-evolution) -7. [Consolidation Action Items](#consolidation-action-items) - ---- - -## Executive Summary - -### Major Architectural Changes - -**v4.2.0 (January 2024)** - THE PIVOTAL RELEASE -- Complete documentation restructuring from feature-based to role-based organization -- Introduction of Component Architecture (Applications + Extensions) to replace Custom Functions -- Resource API introduced as unified data access interface -- 11 top-level directories reduced to 5 organized categories - -**v4.3.0 (March 2024)** - "Tucker Release" -- Relationships and foreign key support -- Query optimization and BigInt support - -**v4.4.0 (October 2024)** -- Native replication system (Plexus) -- GraphQL support -- Sharding -cxxsx -**v4.5.0 (March 2025)** -- Blob storage system -- Password hashing upgrades - -**v4.6.0 (June 2025)** -- Vector indexing (HNSW) -- Data loader component -- New Extension/Plugin API - -**v4.7.0 (October 2025)** -- Component status monitoring -- OCSP certificate support -- Formal deprecation of Custom Functions - -### Key Statistics - -| Version | Total Docs | Major Features Added | Deprecations | -|---------|-----------|---------------------|--------------| -| v4.1 | 92 files | (baseline) | - | -| v4.2 | 101 files | Component Architecture, Resource API | Custom Functions moved to Ops API | -| v4.3 | 101+ files | Relationships, BigInt, CRDT | - | -| v4.4 | 101+ files | Native Replication, GraphQL, Sharding | - | -| v4.5 | 114+ files | Blob Storage, HTTP/2 | - | -| v4.6 | 114+ files | Vector Indexing, Plugin API | - | -| v4.7 | 114+ files | Status Monitoring, OCSP | Custom Functions deprecated | - ---- - -## Critical Deprecations & Removals - -### 1. Custom Functions → Component Architecture - -**Status**: DEPRECATED in v4.7, replaced by Applications + Extensions + Plugins - -#### Version Timeline -- **v4.1**: Featured as top-level `custom-functions/` directory (12 files) -- **v4.2**: Moved to `developers/operations-api/custom-functions.md` (consolidated) -- **v4.2**: Component Architecture introduced as replacement -- **v4.7**: Marked with `:::warning Deprecated` banner - -#### Files Affected (v4.1) -``` -custom-functions/ -├── create-project.md -├── custom-functions-operations.md -├── debugging-custom-function.md -├── define-helpers.md -├── define-routes.md -├── example-projects.md -├── host-static.md -├── requirements-definitions.md -├── templates.md -└── using-npm-git.md -``` - -#### Migration Path -- **Custom Functions** → **Applications** (for HTTP routes and APIs) -- **Custom Functions** → **Extensions** (for background services) -- **Custom Functions** → **Plugins** (for system integrations, v4.6+) - -#### Consolidation Action -- ✅ Retain documentation under "Legacy/Deprecated" section -- ✅ Add prominent deprecation warning -- ✅ Cross-reference to Applications/Extensions/Plugins documentation -- ✅ Include migration guide - ---- - -### 2. Deprecated NoSQL Operation Parameters - -**Status**: DEPRECATED in v4.2+, alternatives provided - -#### Changed Parameters -| Deprecated Parameter | Replacement | Version Introduced | File | -|---------------------|-------------|-------------------|------| -| `search_attribute` | `attribute` | v4.2 | nosql-operations.md | -| `search_value` | `value` | v4.2 | nosql-operations.md | -| `search_type` | `comparator` | v4.2 | nosql-operations.md | - -#### Consolidation Action -- ✅ Document both old and new parameters -- ✅ Mark deprecated parameters with version labels -- ✅ Show equivalent examples using both syntaxes -- ✅ Add "Version History" section to nosql-operations documentation - ---- - -### 3. HarperDB Studio → Harper Studio - -**Status**: RENAMED in v4.2+ - -#### Version Timeline -- **v4.1**: `harperdb-studio/` (top-level) -- **v4.2**: `administration/harperdb-studio/` -- **v4.7**: `administration/harper-studio/` - -#### Documentation Changes -**Removed Files** (tied to Custom Functions): -- `manage-functions.md` (replaced by `manage-applications.md`) -- `manage-charts.md` - -**Added Files** (new features): -- `manage-applications.md` (v4.2+) -- `manage-replication.md` (v4.4+) - -#### Consolidation Action -- ✅ Use "Harper Studio" as primary name -- ✅ Add redirect/note mentioning previous "HarperDB Studio" name -- ✅ Merge manage-functions content into historical section - ---- - -## Major Feature Additions by Version - -### v4.2.0 (January 2024) - MAJOR RELEASE - -#### New Architecture -- **Component Architecture** - Applications, Extensions framework - - Files: `developers/applications/` (6 files), `developers/components/` (7 files) - -#### New APIs -- **Resource API** - Unified data access interface - - Files: `developers/resources/` directory -- **REST Interface** - RESTful data access - - Files: `developers/rest-interface.md` - -#### New Features -- **Real-Time Messaging** - MQTT, WebSockets, Server-Sent Events - - Files: `developers/real-time-messaging.md` -- **Configurable Database Schemas** - GraphQL schema syntax - - Files: `developers/applications/defining-schemas.md` -- **Clone Node Operation** - Database cloning - - Files: `developers/operations-api/clustering.md` - ---- - -### v4.3.0 (March 2024) - "Tucker Release" - -#### Data Model Enhancements -- **Relationships and Joins** - Foreign keys, many-to-one, one-to-many - - Files: Added to resource API documentation -- **Indexing Nulls** - Null value indexing support -- **BigInt Support** - Large integers (up to 1000 bits) - - Files: `reference/data-types.md` updated -- **CRDT Support** - Conflict-free replicated data types - - Files: Added to resource API documentation - -#### Developer Tools -- **OpenAPI Specification** - `/openapi` endpoint - - Files: `developers/operations-api/` updated -- **CLI Expansion** - Operations API commands from CLI - - Files: `deployments/harperdb-cli.md` updated -- **Query Optimizations** - Improved query planning - - Files: `reference/resources/query-optimization.md` (added in later version) - -#### Operations -- **Database Compaction** - `compact_database` operation - - Files: `developers/operations-api/system-operations.md` - ---- - -### v4.4.0 (October 2024) - -#### Clustering & Distribution -- **Native Replication (Plexus)** - New replication system via WebSocket - - Files: `developers/replication/` directory (NEW) - - `index.md` - - `configuration.md` - - `monitoring.md` - - `troubleshooting.md` -- **Sharding Support** - Data distribution across nodes - - Files: Integrated into replication documentation - -#### Data Model -- **Computed Properties** - Dynamic calculated properties - - Files: `reference/` updated -- **Custom Indexing** - Composite and full-text indexing via computed properties - - Files: `reference/` updated -- **Auto-incrementing Primary Keys** - Automatic numeric key generation - - Files: `reference/` updated - -#### APIs -- **GraphQL Support** - Native GraphQL querying - - Files: `reference/graphql.md` (NEW) - -#### Security -- **Dynamic Certificate Management** - Runtime certificate changes - - Files: `developers/operations-api/certificate-management.md` (NEW) - -#### System -- **Status Report on Startup** - Service status display - - Files: Logging documentation updated - ---- - -### v4.5.0 (March 2025) - -#### Storage -- **Blob Storage** - Efficient binary object handling with streaming - - Files: `reference/blob.md` (NEW) -- **Storage Reclamation** - Automatic cleanup when storage low - - Files: `reference/storage-algorithm.md` updated - -#### Security -- **Password Hashing Upgrade** - SHA256 and Argon2id support - - Files: `developers/security/` updated -- **Certificate Revocation** - Revoked certificate list support - - Files: `developers/security/certificate-verification.md` (NEW) - -#### Performance -- **HTTP/2 Support** - HTTP/2 protocol - - Files: `reference/` updated -- **Property Forwarding** - Standard property access syntax - - Files: `reference/` updated - -#### Analytics -- **Resource/Storage Analytics** - Enhanced metrics - - Files: `reference/analytics.md` (NEW) - -#### APIs -- **Table.getRecordCount()** - Record counting API - - Files: Resource API documentation updated - -#### Documentation Enhancements -- **Resources Directory** - New consolidated reference section - - Files: `reference/resources/` (NEW) - - `index.md` - - `instance-binding.md` - - `migration.md` - - `query-optimization.md` - ---- - -### v4.6.0 (June 2025) - -#### AI/ML -- **Vector Indexing (HNSW)** - Hierarchical Navigable Small World algorithm - - Files: Added to operations API documentation, resource API - -#### Component System -- **New Extension API** - Dynamic reloading support - - Files: `reference/components/extensions.md` updated -- **Data Loader** - JSON data loading component - - Files: `developers/applications/data-loader.md` (NEW) -- **Plugin API** - New iteration of extension system - - Files: `reference/components/plugins.md` (NEW) - -#### Operations -- **Logging Improvements** - Component-specific logging configuration - - Files: `administration/logging/` updated -- **Resource API Upgrades** - Improved ease of use - - Files: `developers/resource-api/` updated -- **only-if-cached behavior** - Improved caching directives - - Files: `developers/applications/caching.md` updated - ---- - -### v4.7.0 (October 2025) - -#### Monitoring -- **Component Status Monitoring** - Status collection from components - - Files: Operations API updated - -#### Security -- **OCSP Support** - Online Certificate Status Protocol for revocation - - Files: `developers/security/` updated - -#### Integration -- **Analytics/Licensing** - Fabric integration - - Files: `reference/analytics.md` updated, `developers/operations-api/analytics.md` (NEW) - -#### Component System -- **Plugin API Improvements** - Enhanced plugin system - - Files: `reference/components/plugins.md` updated - -#### Major Reorganization -- **Components Moved to Reference** - `developers/components/` → `reference/components/` - - 9 files reorganized - ---- - -## Feature-by-Feature Version History - -### Applications - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Custom Functions** | ✅ Featured | ⚠️ In Ops API | ⚠️ In Ops API | ⚠️ In Ops API | ⚠️ Deprecated | ⚠️ Deprecated | ❌ Deprecated | -| **Component Architecture** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | -| **Applications** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | -| **Extensions** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | -| **Plugins** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | -| **Data Loader** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | -| **Define Routes** | ✅ (CF) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Defining Schemas** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Defining Roles** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | -| **Caching** | ❌ | ✅ New | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | -| **Debugging** | ✅ (CF) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Example Projects** | ✅ (CF) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Web Applications** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | - -**Files**: -- v4.1: `custom-functions/` (12 files) -- v4.2-v4.6: `developers/applications/` (6 files) -- v4.7: `developers/applications/` (8 files) - ---- - -### Data Access APIs - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Operations API** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Resource API** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ Enhanced | ✅ Enhanced | ✅ | -| **REST Interface** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| **GraphQL** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **SQL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **NoSQL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **OpenAPI Spec** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | - -**Files**: -- v4.1: `reference/` (7 files) -- v4.2: `developers/operations-api/` (16 files), `developers/resource-api/` (NEW) -- v4.7: `developers/operations-api/` (20 files), `reference/graphql.md` - ---- - -### Data Model Features - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Dynamic Schema** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Configurable Schemas** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Relationships/Joins** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | -| **Foreign Keys** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | -| **Computed Properties** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **Custom Indexing** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **Auto-increment Keys** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **Vector Indexing** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | -| **BigInt Support** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | -| **CRDT Support** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | -| **Null Indexing** | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | -| **Blob Storage** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | - -**Files**: -- v4.1-v4.2: `reference/data-types.md`, `reference/dynamic-schema.md` -- v4.3+: Enhanced data type documentation -- v4.4+: `reference/` expanded with computed properties, indexing -- v4.5+: `reference/blob.md` -- v4.6+: Vector indexing in operations API - ---- - -### Clustering & Replication - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Clustering (Legacy)** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **NATS Clustering** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Native Replication (Plexus)** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **Sharding** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **Clone Node** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Files**: -- v4.1: `clustering/` (13 files, top-level) -- v4.2-v4.7: `reference/clustering/` (13 files, moved) -- v4.4+: `developers/replication/` (NEW - 4 files) -- v4.7: `developers/operations-api/clustering-nats.md` (split from clustering.md) - ---- - -### Security Features - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Authentication** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Authorization/Roles** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **TLS/SSL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **JWT** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **LDAP** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **SAML** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Dynamic Cert Management** | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| **Password Hashing Upgrade** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | -| **Certificate Revocation** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | -| **Certificate Verification** | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | -| **mTLS Auth** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | -| **OCSP Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | - -**Files**: -- v4.1: `security/` (6 files, top-level) -- v4.2-v4.6: `developers/security/` (6 files) -- v4.7: `developers/security/` (8 files) - - Added: `certificate-verification.md`, `mtls-auth.md` - ---- - -### Real-Time & Messaging - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **MQTT** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| **WebSockets** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Server-Sent Events** | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Files**: -- v4.2+: `developers/real-time-messaging.md` - ---- - -### Operations API - Specific Operations - -#### System Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `restart` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `get_system_information` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `get_configuration` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `set_configuration` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `compact_database` | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | - -**Files**: `developers/operations-api/system-operations.md` (v4.7+) - ---- - -#### Schema Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `create_schema` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `describe_schema` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `drop_schema` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - ---- - -#### Table Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `create_table` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `describe_table` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `drop_table` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `create_attribute` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `drop_attribute` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - ---- - -#### NoSQL Data Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `insert` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `update` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `upsert` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `delete` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `search_by_hash` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `search_by_value` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `search_by_conditions` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Parameter Changes**: -- v4.2+: `search_attribute` → `attribute` (deprecated) -- v4.2+: `search_value` → `value` (deprecated) -- v4.2+: `search_type` → `comparator` (deprecated) - ---- - -#### SQL Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `sql` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - ---- - -#### User & Role Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `add_user` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `alter_user` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `drop_user` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `user_info` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `list_users` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `add_role` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `alter_role` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `drop_role` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - ---- - -#### Clustering Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `add_node` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `update_node` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `remove_node` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `cluster_status` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `cluster_network` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| `clone_node` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | - -**File Split** (v4.7): -- `clustering.md` → `clustering.md` + `clustering-nats.md` - ---- - -#### Custom Functions Operations (DEPRECATED) - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `get_functions` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | -| `set_functions` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | -| `drop_function` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | -| `deploy` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | -| `package` | ✅ | ✅ | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ Deprecated | - -**Files**: -- v4.1: `custom-functions/custom-functions-operations.md` -- v4.2+: `developers/operations-api/custom-functions.md` -- v4.7: Marked with `:::warning Deprecated` - ---- - -#### Component Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `deploy_component` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| `drop_component` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| `package_component` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | -| `get_components` | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Options Added** (tracked by version): -- `deploy_component`: - - v4.2: Initial implementation - - **Need to check**: When was `install_command` option added? - ---- - -#### Certificate Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `add_certificate` | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| `list_certificates` | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | -| `delete_certificate` | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | ✅ | - -**Files**: `developers/operations-api/certificate-management.md` (v4.4+) - ---- - -#### Analytics Operations - -| Operation | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|-----------|------|------|------|------|------|------|------| -| `get_analytics` | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | ✅ | - -**Files**: `developers/operations-api/analytics.md` (v4.7) - ---- - -### Logging Features - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Standard Logging** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Enhanced | ✅ | -| **Transaction Logging** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Audit Logging** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Component Logging** | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Introduced | ✅ | - -**Files**: -- v4.1: `logging.md`, `transaction-logging.md`, `audit-logging.md` (top-level) -- v4.2+: `administration/logging/` (directory) - - `index.md` - - `standard-logging.md` - - `transaction-logging.md` - - `audit-logging.md` - ---- - -### Administration Tools - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **HarperDB Studio** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | -| **Harper Studio** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ Renamed | -| **Jobs** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Configuration** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Studio Files**: -- v4.1: `harperdb-studio/` (top-level) -- v4.2-v4.6: `administration/harperdb-studio/` (9 files) -- v4.7: `administration/harper-studio/` (9 files) - -**Configuration Files**: -- v4.1: `configuration.md` (top-level) -- v4.2+: `deployments/configuration.md` - ---- - -### SQL Features - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **SQL Operations** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **SQL Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Geospatial Functions** | ✅ (9 files) | ✅ (9 files) | ✅ (9 files) | ✅ (9 files) | ✅ (9 files) | ✅ (consolidated) | ✅ (consolidated) | -| **Math Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **String Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Date/Time Functions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Geospatial Consolidation**: -- v4.1-v4.5: `sql-guide/sql-geospatial-functions/` (9 individual files) - - `geoarea.md`, `geocontains.md`, `geoconvert.md`, `geocrosses.md`, `geodifference.md`, `geodistance.md`, `geoequal.md`, `geolength.md`, `geonear.md` -- v4.6+: `reference/sql-guide/sql-geospatial-functions.md` (consolidated) - -**Files**: -- v4.1: `sql-guide/` (13 files, top-level) -- v4.2+: `reference/sql-guide/` (6 files) - ---- - -### Deployment Options - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Docker** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Linux** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Windows** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **macOS** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **HarperDB Cloud** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Files**: -- v4.1: `install-harperdb/` (top-level), `harperdb-cloud/` (top-level) -- v4.2+: `deployments/install-harperdb/`, `deployments/harperdb-cloud/` - ---- - -### SDKs & Integrations - -| Feature | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|---------|------|------|------|------|------|------|------| -| **Node.js SDK** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Python SDK** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Java Driver** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **ODBC/JDBC** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| **Google Data Studio** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - -**Files**: -- v4.1: `add-ons-and-sdks/` (3 files, top-level) -- v4.2: `developers/components/` includes SDK information - ---- - -## Documentation Structure Evolution - -### v4.1 Organization (Feature-Based) - -``` -version-4.1/ -├── add-ons-and-sdks/ [3 files] -├── clustering/ [13 files] -├── custom-functions/ [12 files] → DEPRECATED -├── getting-started/ [2 files] -├── harperdb-cloud/ [5 files] -├── harperdb-studio/ [9 files] -├── install-harperdb/ [3 files] -├── reference/ [7 files] -├── security/ [6 files] -├── sql-guide/ [13 files] -├── audit-logging.md -├── configuration.md -├── harperdb-cli.md -├── index.md -├── jobs.md -├── logging.md -├── support.md -├── transaction-logging.md -└── upgrade-hdb-instance.md - -Total: 92 files -``` - ---- - -### v4.2-v4.6 Organization (Role-Based) - -``` -version-4.2+/ -├── administration/ -│ ├── harper-studio/ [9 files] (v4.7 renamed) -│ ├── harperdb-studio/ [9 files] (v4.2-v4.6) -│ ├── logging/ [4 files] -│ ├── configuration.md -│ ├── jobs.md -│ └── index.md -├── deployments/ -│ ├── harperdb-cloud/ [5 files] -│ ├── install-harperdb/ [3 files] -│ ├── configuration.md -│ ├── harperdb-cli.md -│ └── index.md -├── developers/ -│ ├── applications/ [6-8 files, grew in v4.7] -│ ├── components/ [6-7 files] → moved to reference/ in v4.7 -│ ├── operations-api/ [16-20 files, grew over time] -│ ├── replication/ [4 files] (v4.4+) -│ ├── resource-api/ [multiple files] -│ ├── security/ [6-8 files] -│ ├── getting-started.md -│ ├── harperdb-applications-in-depth.md -│ ├── harperdb-cli.md -│ ├── real-time-messaging.md (v4.2+) -│ ├── rest-interface.md (v4.2+) -│ └── index.md -├── reference/ -│ ├── clustering/ [13 files] (moved from top-level) -│ ├── components/ [9 files] (v4.7+, moved from developers/) -│ ├── resources/ [4 files] (v4.5+) -│ ├── sql-guide/ [6 files] -│ ├── analytics.md (v4.5+) -│ ├── architecture.md -│ ├── blob.md (v4.5+) -│ ├── content-types.md -│ ├── data-types.md -│ ├── dynamic-schema.md -│ ├── globals.md -│ ├── graphql.md (v4.4+) -│ ├── headers.md -│ ├── limits.md -│ ├── roles.md -│ ├── storage-algorithm.md -│ ├── transactions.md -│ └── index.md -└── index.md - -Total (v4.7): 114+ files -``` - ---- - -### Key Structural Changes - -#### v4.1 → v4.2 (Major Reorganization) - -**Moved to Administration**: -- `harperdb-studio/` → `administration/harperdb-studio/` -- `logging.md`, `transaction-logging.md`, `audit-logging.md` → `administration/logging/` -- `jobs.md` → `administration/jobs.md` - -**Moved to Deployments**: -- `install-harperdb/` → `deployments/install-harperdb/` -- `harperdb-cloud/` → `deployments/harperdb-cloud/` -- `configuration.md` → `deployments/configuration.md` -- `harperdb-cli.md` → `deployments/harperdb-cli.md` - -**Moved to Developers**: -- `security/` → `developers/security/` -- `getting-started/` → `developers/getting-started.md` - -**Moved to Reference**: -- `clustering/` → `reference/clustering/` -- `sql-guide/` → `reference/sql-guide/` - -**New Directories Created**: -- `developers/applications/` -- `developers/components/` -- `developers/operations-api/` -- `developers/resource-api/` - -**Deprecated/Removed**: -- `custom-functions/` → consolidated to `developers/operations-api/custom-functions.md` -- `add-ons-and-sdks/` → integrated into `developers/components/` - ---- - -#### v4.6 → v4.7 (Components Reorganization) - -**Moved**: -- `developers/components/` → `reference/components/` - -**Reasoning**: Components (Applications, Extensions, Plugins) became core reference documentation rather than developer tools. - ---- - -## Operations API Evolution - -### Operations API File Structure by Version - -#### v4.1 (Implied from reference docs) -- Basic operations documented but not in dedicated directory - -#### v4.2 (16 files) -``` -developers/operations-api/ -├── clustering.md -├── custom-functions.md -├── index.md -├── nosql-operations.md -├── registration.md -├── schema-table-operations.md -├── sql-operations.md -├── user-role-operations.md -└── [8 more files] -``` - -#### v4.7 (20 files) -``` -developers/operations-api/ -├── analytics.md [NEW] -├── certificate-management.md [NEW - v4.4] -├── clustering-nats.md [NEW - split from clustering.md] -├── clustering.md -├── configuration.md [NEW] -├── custom-functions.md [DEPRECATED] -├── index.md -├── nosql-operations.md -├── registration.md -├── schema-table-operations.md -├── sql-operations.md -├── system-operations.md [NEW] -├── user-role-operations.md -└── [7 more files] -``` - -**Notable Additions**: -- v4.3: `compact_database` operation added to system operations -- v4.4: `certificate-management.md` for dynamic certificate operations -- v4.5: Enhanced system operations for storage reclamation -- v4.7: `analytics.md` for Fabric integration, `system-operations.md` consolidated - -**Notable Removals**: -- v4.7: `utilities.md` removed (content integrated elsewhere) - ---- - -## Consolidation Action Items - -### Phase 1: Deprecated Features Documentation - -#### 1.1 Custom Functions (CRITICAL) - -**Goal**: Create comprehensive legacy documentation with clear deprecation warnings and migration paths. - -**Actions**: -- [ ] Create new section: `legacy/custom-functions/` -- [ ] Migrate all 12 files from v4.1 `custom-functions/` directory -- [ ] Add deprecation banner to every page: - ```markdown - :::warning Deprecated in v4.7 - Custom Functions have been deprecated as of v4.7.0 (October 2025) and replaced by the [Component Architecture](/developers/applications/). - - **Migration Path**: - - For HTTP routes and APIs → [Applications](/developers/applications/) - - For background services → [Extensions](/reference/components/extensions/) - - For system integrations → [Plugins](/reference/components/plugins/) - - See the [Migration Guide](/legacy/custom-functions/migration-guide/) for detailed instructions. - ::: - ``` -- [ ] Create `legacy/custom-functions/migration-guide.md` with: - - Side-by-side code examples (Custom Functions vs. Applications) - - Feature mapping table - - Common migration scenarios - - Troubleshooting section -- [ ] Add version labels to all Custom Functions operations in Operations API docs -- [ ] Create redirect from old paths to legacy section - -**Files to Migrate**: -1. `create-project.md` -2. `custom-functions-operations.md` -3. `debugging-custom-function.md` -4. `define-helpers.md` -5. `define-routes.md` -6. `example-projects.md` -7. `host-static.md` -8. `requirements-definitions.md` -9. `templates.md` -10. `using-npm-git.md` - ---- - -#### 1.2 Deprecated NoSQL Parameters - -**Goal**: Document parameter evolution with clear version labels. - -**Actions**: -- [ ] Add "Parameter History" section to `nosql-operations.md` -- [ ] Create comparison table: - ```markdown - | Deprecated (v4.1) | Current (v4.2+) | Status | - |------------------|-----------------|---------| - | `search_attribute` | `attribute` | Deprecated | - | `search_value` | `value` | Deprecated | - | `search_type` | `comparator` | Deprecated | - ``` -- [ ] Show side-by-side examples with version labels -- [ ] Add deprecation warnings to examples using old parameters -- [ ] Document when support might be fully removed - ---- - -### Phase 2: Renamed Features - -#### 2.1 HarperDB Studio → Harper Studio - -**Goal**: Use current naming while acknowledging historical name. - -**Actions**: -- [ ] Use "Harper Studio" as primary name throughout consolidated docs -- [ ] Add note at top of Harper Studio section: - ```markdown - :::info Historical Note - Harper Studio was previously known as "HarperDB Studio" in versions prior to v4.7. - ::: - ``` -- [ ] Create redirect rule: `/administration/harperdb-studio/*` → `/administration/harper-studio/*` -- [ ] Update all cross-references to use new name -- [ ] Merge v4.1 `manage-functions.md` content into legacy/custom-functions section -- [ ] Document feature evolution: - - v4.1: `manage-functions.md`, `manage-charts.md` - - v4.7: `manage-applications.md`, `manage-replication.md` - ---- - -### Phase 3: Relocated/Reorganized Features - -#### 3.1 Clustering Documentation - -**Goal**: Consolidate under reference section with version history. - -**Actions**: -- [ ] Use v4.7 structure: `reference/clustering/` -- [ ] Add version note explaining the relocation: - ```markdown - :::info Location History - - v4.1: Top-level `clustering/` directory - - v4.2+: Moved to `reference/clustering/` - ::: - ``` -- [ ] Ensure all 13 files are present -- [ ] Update cross-references throughout documentation -- [ ] Add links to related features: - - Native Replication (v4.4+): `developers/replication/` - - Clustering Operations: `developers/operations-api/clustering.md` - - NATS Clustering: `developers/operations-api/clustering-nats.md` - ---- - -#### 3.2 Components Documentation - -**Goal**: Place in reference section with clear distinction between Applications, Extensions, and Plugins. - -**Actions**: -- [ ] Use v4.7 structure: `reference/components/` -- [ ] Add version timeline: - ```markdown - ## Version History - - - **v4.2**: Component Architecture introduced - - **v4.6**: Plugin API introduced - - **v4.7**: Components documentation moved to reference section - ``` -- [ ] Create clear sections: - - `applications.md` - HTTP routes, APIs, web applications - - `extensions.md` - Background services, data processing - - `plugins.md` - System integrations (v4.6+) - - `built-in-extensions.md` - Core system extensions - - `configuration.md` - Component configuration -- [ ] Cross-reference to `developers/applications/` for hands-on guides -- [ ] Document evolution from Custom Functions → Components - ---- - -#### 3.3 SQL Geospatial Functions - -**Goal**: Use consolidated file with internal navigation. - -**Actions**: -- [ ] Use v4.6+ structure: Single `reference/sql-guide/sql-geospatial-functions.md` -- [ ] Ensure all 9 functions are documented: - 1. `GEOAREA()` - 2. `GEOCONTAINS()` - 3. `GEOCONVERT()` - 4. `GEOCROSSES()` - 5. `GEODIFFERENCE()` - 6. `GEODISTANCE()` - 7. `GEOEQUAL()` - 8. `GEOLENGTH()` - 9. `GEONEAR()` -- [ ] Add table of contents at top for easy navigation -- [ ] Use consistent format for each function: - - Syntax - - Parameters - - Return value - - Examples - - Version availability -- [ ] Note the consolidation at the top: - ```markdown - :::info Documentation Consolidation - Prior to v4.6, each geospatial function was documented in a separate file. This documentation has been consolidated for easier reference. - ::: - ``` - ---- - -### Phase 4: Version-Specific Feature Documentation - -#### 4.1 Operations API Operations - -**Goal**: Document all operations with version introduced and option changes. - -**Actions**: -- [ ] Create "Version History" section for each operation -- [ ] Format example for `deploy_component`: - ```markdown - ## deploy_component - - **Introduced**: v4.2.0 - - ### Syntax - ```json - { - "operation": "deploy_component", - "project": "string", - "package": "string" // optional - } - ``` - - ### Options - - | Option | Type | Required | Introduced | Description | - |--------|------|----------|------------|-------------| - | `project` | string | Yes | v4.2 | Component project path | - | `package` | string | No | v4.2 | Package file path | - | `install_command` | string | No | v4.X | Custom install command | - - ### Version History - - - **v4.2.0**: Operation introduced - - **v4.X.0**: Added `install_command` option [NEED TO VERIFY VERSION] - ``` - -**Operations Requiring Version Tracking**: -1. `compact_database` (v4.3) -2. `clone_node` (v4.2) -3. All certificate operations (v4.4): - - `add_certificate` - - `list_certificates` - - `delete_certificate` -4. Component operations (v4.2): - - `deploy_component` - Track option additions - - `drop_component` - - `package_component` - - `get_components` -5. `get_analytics` (v4.5) - -**RESEARCH NEEDED**: -- [ ] When was `install_command` added to `deploy_component`? -- [ ] Review each operation's release notes for option additions - ---- - -#### 4.2 Data Model Features - -**Goal**: Document feature availability by version. - -**Actions**: -- [ ] Create "Feature Availability" tables in relevant sections -- [ ] Example for Relationships documentation: - ```markdown - ## Relationships - - **Introduced**: v4.3.0 (Tucker Release) - - Harper allows you to define relationships between tables using foreign keys. - - ### Feature Matrix - - | Feature | Since | Description | - |---------|-------|-------------| - | Foreign Keys | v4.3 | Link records across tables | - | Many-to-One | v4.3 | Multiple records reference one record | - | One-to-Many | v4.3 | One record references multiple records | - | Cascade Delete | v4.3 | Automatic deletion of related records | - ``` - -**Features Requiring Version Labels**: -1. Relationships & Joins (v4.3) -2. Computed Properties (v4.4) -3. Custom Indexing (v4.4) -4. Auto-increment Keys (v4.4) -5. Vector Indexing (v4.6) -6. BigInt Support (v4.3) -7. CRDT Support (v4.3) -8. Null Indexing (v4.3) -9. Blob Storage (v4.5) - ---- - -#### 4.3 API Features - -**Goal**: Document API evolution and new endpoints. - -**Actions**: -- [ ] Create API comparison table: - ```markdown - ## Data Access APIs - - | API | Introduced | Primary Use Case | - |-----|------------|------------------| - | Operations API | v4.0 | Administrative and data operations | - | Resource API | v4.2 | Unified data access interface | - | REST Interface | v4.2 | RESTful data access | - | GraphQL | v4.4 | Graph-based querying | - | SQL | v4.0 | Relational queries | - | NoSQL | v4.0 | Document operations | - ``` -- [ ] Add "API Evolution" timeline graphic/section -- [ ] Cross-reference between API types with version context - ---- - -#### 4.4 Security Features - -**Goal**: Document security enhancements by version. - -**Actions**: -- [ ] Create security features timeline: - ```markdown - ## Security Feature Timeline - - ### Core Authentication & Authorization (v4.0+) - - Username/Password authentication - - Role-based access control (RBAC) - - JWT token authentication - - LDAP integration - - SAML 2.0 support - - ### TLS/SSL Enhancements - - **v4.0**: Basic TLS/SSL support - - **v4.4**: Dynamic certificate management - - **v4.5**: Certificate revocation lists - - **v4.5**: Certificate verification - - **v4.7**: mTLS authentication - - **v4.7**: OCSP support - - ### Password Security - - **v4.0-v4.4**: SHA256 hashing - - **v4.5**: Argon2id support (recommended) - ``` -- [ ] Add security best practices with version recommendations -- [ ] Document migration paths for security upgrades - ---- - -### Phase 5: New Subsystem Documentation - -#### 5.1 Native Replication (v4.4+) - -**Goal**: Comprehensive replication documentation separate from legacy clustering. - -**Actions**: -- [ ] Ensure `developers/replication/` directory is complete -- [ ] Add clear distinction from legacy clustering: - ```markdown - ## Replication vs. Clustering - - Harper offers two approaches to distributed data: - - ### Native Replication (Plexus) - Introduced v4.4 - - WebSocket-based communication - - Automatic conflict resolution - - Sharding support - - Recommended for new deployments - - Documentation: `developers/replication/` - - ### Legacy Clustering (NATS-based) - v4.0+ - - NATS message bus communication - - Manual conflict resolution - - No sharding - - Documentation: `reference/clustering/` - ``` -- [ ] Include migration guide from clustering to replication -- [ ] Cross-reference to: - - `reference/clustering/` (legacy) - - `developers/operations-api/clustering.md` - - `developers/operations-api/clustering-nats.md` - ---- - -#### 5.2 Blob Storage (v4.5+) - -**Goal**: Complete blob storage documentation. - -**Actions**: -- [ ] Ensure `reference/blob.md` covers: - - What qualifies as blob data - - Streaming APIs - - Storage locations - - Performance characteristics - - Size limits - - Version: v4.5+ -- [ ] Add examples for: - - Storing images - - Storing videos - - Storing documents - - Streaming large files -- [ ] Cross-reference to: - - Storage algorithm documentation - - Resource API (for blob access) - - Storage analytics - ---- - -#### 5.3 Vector Indexing (v4.6+) - -**Goal**: Comprehensive vector search documentation. - -**Actions**: -- [ ] Ensure vector indexing documentation covers: - - HNSW algorithm explanation - - Use cases (similarity search, embeddings) - - Index creation - - Query syntax - - Performance tuning - - Version: v4.6+ -- [ ] Add examples for: - - Semantic search - - Image similarity - - Recommendation systems -- [ ] Cross-reference to: - - Custom indexing (v4.4) - - Computed properties - - Resource API query syntax - ---- - -### Phase 6: Cross-Version References - -#### 6.1 Version Labels - -**Goal**: Consistent version labeling throughout documentation. - -**Action**: Add version badges to features: -```markdown -## Auto-Incrementing Primary Keys v4.4+ - -Harper supports automatic generation of numeric primary keys. -``` - -**Standard Badge Types**: -- `v4.X+` - Feature introduced -- `Deprecated v4.7` - Deprecated feature -- `Removed v4.X` - Removed feature - ---- - -#### 6.2 Version-Specific Notes - -**Goal**: Call out version-specific behavior. - -**Standard Format**: -```markdown -:::info Version 4.3+ -This feature requires HarperDB v4.3 or later. -::: - -:::warning Versions prior to 4.5 -Password hashing in versions prior to 4.5 uses SHA256. Upgrade to v4.5+ for Argon2id support. -::: - -:::danger Breaking Change in v4.2 -The `search_attribute` parameter was deprecated in v4.2. Use `attribute` instead. -::: -``` - ---- - -### Phase 7: Navigation & Organization - -#### 7.1 Consolidated Sidebar - -**Goal**: Create unified sidebar that accommodates all versions. - -**Proposed Structure**: -```javascript -{ - "docsSidebar": [ - { - "type": "doc", - "id": "index", - "label": "Introduction" - }, - { - "type": "category", - "label": "Developers", - "items": [ - { - "type": "category", - "label": "Applications", - "items": [ - "developers/applications/index", - "developers/applications/define-routes", - "developers/applications/defining-schemas", - "developers/applications/defining-roles", - "developers/applications/caching", - "developers/applications/debugging", - "developers/applications/data-loader", - "developers/applications/example-projects", - "developers/applications/web-applications" - ] - }, - { - "type": "category", - "label": "Data Access APIs", - "items": [ - "developers/resource-api/index", - "developers/rest-interface", - "developers/operations-api/index", - "developers/graphql" - ] - }, - { - "type": "category", - "label": "Replication", - "link": { - "type": "doc", - "id": "developers/replication/index" - }, - "items": [ - "developers/replication/configuration", - "developers/replication/monitoring", - "developers/replication/troubleshooting" - ] - }, - { - "type": "category", - "label": "Security", - "items": [ - "developers/security/index", - "developers/security/authentication", - "developers/security/authorization", - "developers/security/tls-ssl", - "developers/security/jwt", - "developers/security/ldap", - "developers/security/saml", - "developers/security/certificate-verification", - "developers/security/mtls-auth" - ] - }, - "developers/real-time-messaging", - "developers/harperdb-applications-in-depth", - "developers/getting-started" - ] - }, - { - "type": "category", - "label": "Administration", - "items": [ - { - "type": "category", - "label": "Harper Studio", - "items": [ - "administration/harper-studio/index", - "administration/harper-studio/manage-applications", - "administration/harper-studio/manage-replication", - // ... other studio files - ] - }, - { - "type": "category", - "label": "Logging", - "items": [ - "administration/logging/index", - "administration/logging/standard-logging", - "administration/logging/transaction-logging", - "administration/logging/audit-logging" - ] - }, - "administration/configuration", - "administration/jobs" - ] - }, - { - "type": "category", - "label": "Deployments", - "items": [ - { - "type": "category", - "label": "Install HarperDB", - "items": [ - "deployments/install-harperdb/docker", - "deployments/install-harperdb/linux", - "deployments/install-harperdb/windows" - ] - }, - { - "type": "category", - "label": "HarperDB Cloud", - "items": [ - // cloud files - ] - }, - "deployments/configuration", - "deployments/harperdb-cli" - ] - }, - { - "type": "category", - "label": "Reference", - "link": { - "type": "doc", - "id": "reference/index" - }, - "items": [ - { - "type": "category", - "label": "Components", - "items": [ - "reference/components/index", - "reference/components/applications", - "reference/components/extensions", - "reference/components/plugins", - "reference/components/built-in-extensions", - "reference/components/configuration" - ] - }, - { - "type": "category", - "label": "Clustering", - "items": [ - // all 13 clustering files - ] - }, - { - "type": "category", - "label": "SQL Guide", - "items": [ - "reference/sql-guide/index", - "reference/sql-guide/sql-functions", - "reference/sql-guide/sql-geospatial-functions", - "reference/sql-guide/sql-math-functions", - "reference/sql-guide/sql-string-functions", - "reference/sql-guide/sql-datetime-functions" - ] - }, - { - "type": "category", - "label": "Resources", - "items": [ - "reference/resources/index", - "reference/resources/instance-binding", - "reference/resources/migration", - "reference/resources/query-optimization" - ] - }, - "reference/architecture", - "reference/analytics", - "reference/blob", - "reference/content-types", - "reference/data-types", - "reference/dynamic-schema", - "reference/globals", - "reference/graphql", - "reference/headers", - "reference/limits", - "reference/roles", - "reference/storage-algorithm", - "reference/transactions" - ] - }, - { - "type": "category", - "label": "Legacy Features", - "items": [ - { - "type": "category", - "label": "Custom Functions (Deprecated)", - "items": [ - "legacy/custom-functions/index", - "legacy/custom-functions/migration-guide", - "legacy/custom-functions/create-project", - "legacy/custom-functions/custom-functions-operations", - "legacy/custom-functions/debugging-custom-function", - "legacy/custom-functions/define-helpers", - "legacy/custom-functions/define-routes", - "legacy/custom-functions/example-projects", - "legacy/custom-functions/host-static", - "legacy/custom-functions/requirements-definitions", - "legacy/custom-functions/templates", - "legacy/custom-functions/using-npm-git" - ] - } - ] - }, - { - "type": "doc", - "id": "support", - "label": "Support" - } - ] -} -``` - ---- - -#### 7.2 Version Switcher - -**Goal**: Allow users to view version-specific documentation. - -**Options**: -1. **Unified docs with version labels** (Recommended) - - Single documentation tree - - Features labeled with version badges - - Deprecated features in separate "Legacy" section - - Pros: Easier to maintain, comprehensive view - - Cons: More complex individual pages - -2. **Version dropdown for major versions** - - Keep separate docs for v4.1, v4.2, etc. - - Add consolidated "Latest (v4.7)" version - - Pros: Version-accurate documentation - - Cons: Harder to maintain, fragmented - -**Recommendation**: Use unified docs with version labels, keep versioned docs archived for reference. - ---- - -### Phase 8: Migration Guides - -#### 8.1 Custom Functions to Components - -**Create**: `legacy/custom-functions/migration-guide.md` - -**Contents**: -- Introduction to Component Architecture -- Feature comparison table -- Step-by-step migration process -- Code examples (before/after) -- Common pitfalls -- FAQ - ---- - -#### 8.2 Legacy Clustering to Native Replication - -**Create**: `developers/replication/migration-from-clustering.md` - -**Contents**: -- Why migrate to Native Replication -- Feature comparison -- Migration process -- Downtime considerations -- Rollback procedures -- FAQ - ---- - -#### 8.3 NoSQL Parameter Updates - -**Create**: `developers/operations-api/nosql-parameter-migration.md` - -**Contents**: -- Parameter mappings -- Code examples -- Automated migration scripts -- Backward compatibility notes - ---- - -### Phase 9: Testing & Validation - -#### 9.1 Link Validation - -**Actions**: -- [ ] Run link checker on all internal links -- [ ] Verify all cross-references point to correct files -- [ ] Test version badge rendering -- [ ] Verify code examples compile/run - ---- - -#### 9.2 Version Accuracy - -**Actions**: -- [ ] Review each version label against release notes -- [ ] Verify operation availability by version -- [ ] Test feature examples on appropriate versions -- [ ] Confirm deprecation timelines - ---- - -#### 9.3 Navigation Testing - -**Actions**: -- [ ] Test sidebar navigation -- [ ] Verify search functionality finds all relevant results -- [ ] Test breadcrumb navigation -- [ ] Verify "Next/Previous" page links - ---- - -## Summary Statistics - -### Documentation Growth - -| Metric | v4.1 | v4.2 | v4.3 | v4.4 | v4.5 | v4.6 | v4.7 | -|--------|------|------|------|------|------|------|------| -| **Total Files** | 92 | 101 | 101+ | 101+ | 114+ | 114+ | 114+ | -| **Top-level Dirs** | 11 | 5 | 5 | 5 | 5 | 5 | 5 | -| **Major Features** | Baseline | +7 | +9 | +8 | +6 | +5 | +4 | -| **Deprecations** | 0 | 1 | 0 | 0 | 0 | 0 | 1 | - ---- - -### Feature Categories - -| Category | Features Added | Versions | -|----------|---------------|----------| -| **Architecture** | Component Architecture, Plugins | v4.2, v4.6 | -| **Data Model** | Relationships, Computed Props, Vector Index, Blob | v4.3, v4.4, v4.5, v4.6 | -| **APIs** | Resource API, REST, GraphQL | v4.2, v4.4 | -| **Clustering** | Native Replication, Sharding | v4.4 | -| **Security** | Dynamic Certs, Argon2id, OCSP, mTLS | v4.4, v4.5, v4.7 | -| **Real-time** | MQTT, WebSocket, SSE | v4.2 | -| **Developer Tools** | OpenAPI, CLI expansion, Data Loader | v4.3, v4.6 | - ---- - -## Next Steps - -1. **Review this mapping** with stakeholders for accuracy -2. **Research missing details**: - - When was `install_command` added to `deploy_component`? - - Any other operation option additions? -3. **Prioritize consolidation phases**: - - Phase 1 (Deprecated features) - High priority - - Phase 4 (Version-specific features) - High priority - - Phase 2-3 (Renames/reorganization) - Medium priority - - Phase 5-9 (New systems, navigation) - Lower priority -4. **Begin implementation** starting with deprecated features and version labeling - ---- - -## Document Maintenance - -**Last Updated**: 2026-02-05 -**Next Review**: After consolidation implementation -**Owner**: Documentation Team - -**Change Log**: -- 2026-02-05: Initial comprehensive analysis created From af96a726203b35952583bd3eba6e226c419cb7a5 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 24 Feb 2026 10:40:34 -0700 Subject: [PATCH 07/51] GraphQL Querying Migration (#440) * docs: migrate GraphQL Querying section to v4 consolidated reference Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 * fixup! docs: migrate GraphQL Querying section to v4 consolidated reference * fixup! fixup! docs: migrate GraphQL Querying section to v4 consolidated reference * Update reference_versioned_docs/version-v4/graphql-querying/overview.md Co-authored-by: Kris Zyp * remove coming soon sections --------- Co-authored-by: Claude Sonnet 4.5 Co-authored-by: Kris Zyp --- .../graphql-querying-link-placeholders.md | 17 ++ .../version-v4/graphql-querying/overview.md | 248 ++++++++++++++++++ .../version-v4-sidebars.json | 13 + v4-docs-migration-map.md | 2 +- 4 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 migration-context/link-placeholders/graphql-querying-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/graphql-querying/overview.md diff --git a/migration-context/link-placeholders/graphql-querying-link-placeholders.md b/migration-context/link-placeholders/graphql-querying-link-placeholders.md new file mode 100644 index 00000000..553c87d0 --- /dev/null +++ b/migration-context/link-placeholders/graphql-querying-link-placeholders.md @@ -0,0 +1,17 @@ +# Link Placeholders for GraphQL Querying + +## reference_versioned_docs/version-v4/graphql-querying/overview.md + +- Line 17: `[defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md "Schema definition documentation")` + - Context: Introduction explaining GraphQL support for defining schemas + - Target should be: Schema definition documentation in Components/Applications section + +- Line 17: `[Resources](./resources/overview.md)` + - Context: Introduction explaining GraphQL support for querying Resources + - Target should be: Resources overview page + - Note: This is a relative link within section, but Resources section not yet migrated + +- Line 58: `[Resource Query API](./resources/overview.md#query)` + - Context: Discussing query patterns and reference to Resource Query API + - Target should be: Query section of Resources overview + - Note: This is a relative link, but Resources section not yet migrated diff --git a/reference_versioned_docs/version-v4/graphql-querying/overview.md b/reference_versioned_docs/version-v4/graphql-querying/overview.md new file mode 100644 index 00000000..2285083e --- /dev/null +++ b/reference_versioned_docs/version-v4/graphql-querying/overview.md @@ -0,0 +1,248 @@ +--- +title: GraphQL Querying +--- + + + + + + + +# GraphQL Querying + +Added in: v4.4.0 (provisional) + +Changed in: v4.5.0 (disabled by default, configuration options) + +Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md "Schema definition documentation"), and for querying [Resources](./resources/overview.md). + +Get started by setting `graphql: true` in `config.yaml`. This configuration option was added in v4.5.0 to allow more granular control over the GraphQL endpoint. + +This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. + +> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. + +Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. + +For example, to request the GraphQL Query: + +```graphql +query GetDogs { + Dog { + id + name + } +} +``` + +The `GET` request would look like: + +```http +GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D +Accept: application/graphql-response+json +``` + +And the `POST` request would look like: + +```http +POST /graphql/ +Content-Type: application/json +Accept: application/graphql-response+json + +{ + "query": "query GetDogs { Dog { id name } } }" +} +``` + +> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. + +The Harper GraphQL querying system is strictly limited to exported Harper Resources. This will typically be a table that uses the `@exported` directive in its schema or `export`'ed custom resources. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resources/overview.md#query) for more complex queries. + +Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: + +```graphql +query GetDogsAndOwners { + Dog { + id + name + breed + } + + Owner { + id + name + occupation + } +} +``` + +This will return all dogs and owners in the database. And is equivalent to executing two REST queries: + +```http +GET /Dog/?select(id,name,breed) +# and +GET /Owner/?select(id,name,occupation) +``` + +## Request Parameters + +There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` + +1. `query` - _Required_ - The string representation of the GraphQL document. + 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. + 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). + 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. +1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter +1. `variables` - _Optional_ - A map of variable values to be used for the specified query + +## Type Checking + +The Harper GraphQL Querying system is designed to handle GraphQL queries and map them directly to Harper's tables, schemas, fields, and relationships to easily query with GraphQL syntax with minimal configuration, code, and overhead. However, the "GraphQL", as a technology has come to encompass an entire model of resolvers and a type checking system, which is outside of the scope of using GraphQL as a _query_ language for data retrieval from Harper. Therefore, the querying system generally does **not** type check, and type checking behavior is outside the scope of resolving queries and is only loosely defined in Harper. + +In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. + +For example, the variable `$name: String!` states that `name` should be a non-null, string value. + +- If the request does not contain the `name` variable, an error will be returned +- If the request provides `null` for the `name` variable, an error will be returned +- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. +- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. + - If `null` is provided as the variable value, an error will still be returned. + - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. +- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. + +The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. + +- Objects will be transformed into properly nested attributes +- Strings and Boolean values are passed through as their AST values +- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. +- List and Enums are not supported. + +## Fragments + +The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. + +For example, in the query + +```graphql +query Get { + Dog { + ...DogFields + } +} + +fragment DogFields on Dog { + name + breed +} +``` + +The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). + +You can literally specify anything in the fragment and it will behave the same way: + +```graphql +fragment DogFields on Any { ... } # this is recommended +fragment DogFields on Cat { ... } +fragment DogFields on Animal { ... } +fragment DogFields on LiterallyAnything { ... } +``` + +As an actual example, fragments should be used for composition: + +```graphql +query Get { + Dog { + ...sharedFields + breed + } + Owner { + ...sharedFields + occupation + } +} + +fragment sharedFields on Any { + id + name +} +``` + +## Short Form Querying + +Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. + +For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. + +```graphql +query GetDog($id: ID!) { + Dog(id: $id) { + name + breed + owner { + name + } + } +} +``` + +And as a properly formed request: + +```http +POST /graphql/ +Content-Type: application/json +Accept: application/graphql-response+json + +{ + "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", + "variables": { + "id": "0" + } +} +``` + +The REST equivalent would be: + +```http +GET /Dog/?id==0&select(name,breed,owner{name}) +# or +GET /Dog/0?select(name,breed,owner{name}) +``` + +Short form queries can handle nested attributes as well. + +For example, return all dogs who have an owner with the name `"John"` + +```graphql +query GetDog { + Dog(owner: { name: "John" }) { + name + breed + owner { + name + } + } +} +``` + +Would be equivalent to + +```http +GET /Dog/?owner.name==John&select(name,breed,owner{name}) +``` + +And finally, we can put all of these together to create semi-complex, equality based queries! + +The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. + +```graphql +query GetDog($dogName: String!, $ownerName: String!) { + Dog(name: $dogName, owner: { name: $ownerName }) { + name + breed + owner { + name + } + } +} +``` diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 6229878b..7996c06f 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -32,6 +32,19 @@ "label": "Authentication" } ] + }, + { + "type": "category", + "label": "GraphQL Querying", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "graphql-querying/overview", + "label": "Overview" + } + ] } ] } diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index aad442a6..6756bc4a 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -589,7 +589,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Added: v4.4.0 (experimental) - Disabled by default: v4.5.0 -- **Status**: Not Started +- **Status**: In Progress - **Notes**: Mark as experimental/incomplete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - OpenAPI specification endpoint From 2c599700eb40ab9ea9c91587e270026018515fc2 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 24 Feb 2026 12:47:34 -0700 Subject: [PATCH 08/51] Studio Migration (#441) * docs: migrate Studio section to v4 consolidated reference Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 * fixup! docs: migrate Studio section to v4 consolidated reference * fixup! docs: migrate Studio section to v4 consolidated reference * fixup! docs: migrate Studio section to v4 consolidated reference * fixup! docs: migrate Studio section to v4 consolidated reference * fixup! fixup! docs: migrate Studio section to v4 consolidated reference * docs: update plans to clarify Studio vs Harper Cloud Studio distinction * manual edits * edit credentials/authentication info * move additional cloud docs over per discussion * simplify legacy cloud * remove reference to cloud in studio docs. * update link placeholders doc --------- Co-authored-by: Claude Sonnet 4.5 --- .../studio-link-placeholders.md | 7 ++++ .../version-v4/legacy/cloud.md | 11 ++++++ .../version-v4/studio/overview.md | 37 +++++++++++++++++++ .../version-v4-sidebars.json | 26 +++++++++++++ src/css/custom.css | 6 +-- v4-docs-implementation-plan.md | 4 +- v4-docs-migration-map.md | 29 ++++++++++----- 7 files changed, 105 insertions(+), 15 deletions(-) create mode 100644 migration-context/link-placeholders/studio-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/legacy/cloud.md create mode 100644 reference_versioned_docs/version-v4/studio/overview.md diff --git a/migration-context/link-placeholders/studio-link-placeholders.md b/migration-context/link-placeholders/studio-link-placeholders.md new file mode 100644 index 00000000..d20e68da --- /dev/null +++ b/migration-context/link-placeholders/studio-link-placeholders.md @@ -0,0 +1,7 @@ +# Link Placeholders for Studio + +## reference_versioned_docs/version-v4/studio/overview.md + +- Line 20: `[configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio "Configuration options")` + - Context: Explaining how to enable local Studio via configuration + - Target should be: Configuration options page localStudio section diff --git a/reference_versioned_docs/version-v4/legacy/cloud.md b/reference_versioned_docs/version-v4/legacy/cloud.md new file mode 100644 index 00000000..6535e8c5 --- /dev/null +++ b/reference_versioned_docs/version-v4/legacy/cloud.md @@ -0,0 +1,11 @@ +--- +title: Harper Cloud +--- + +Harper Cloud (also sometimes referred to as Harper Studio) was Harper's original PaaS offering. +It has been fully replaced by [Harper Fabric](https://fabric.harper.fast). +All users are encouraged to migrate or get started using Harper Fabric immediately. + +[Local Studio](../studio/overview.md) is still an available feature, and now uses the same client as Harper Fabric. + +Reach out to [support@harperdb.io](mailto:support@harperdb.io) or join our community [Discord](https://harper.fast/discord) if you have questions. diff --git a/reference_versioned_docs/version-v4/studio/overview.md b/reference_versioned_docs/version-v4/studio/overview.md new file mode 100644 index 00000000..41e2d46d --- /dev/null +++ b/reference_versioned_docs/version-v4/studio/overview.md @@ -0,0 +1,37 @@ +--- +title: Local Studio +--- + + + + +- Added in: v4.1.0 +- Changed in: v4.3.0 (Upgrade to match Cloud client) +- Changed in: v4.7.0 (Upgraded to match Fabric client) + +Harper Local Studio is a web-based GUI that enables you to administer, navigate, and monitor your Harper instance through a simple, user-friendly interface without requiring knowledge of the underlying Harper APIs. + +It is automatically bundled with all Harper instances and is enabled by default on the Operations API port. + +If you're looking for the platform as a service interface, go to [Harper Fabric](https://fabric.harper.fast) instead. + +## Configuration + +To enable the local Studio, set `localStudio.enabled` to `true` in your [configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio "Configuration options"): + +```yaml +localStudio: + enabled: true +``` + +The local studio is provided by the [Operations API](TODO:reference_versioned_docs/version-v4/operations/configuration.md) and is available on the configured `operationsApi.port` or `operationsApi.securePort` values. This is `9925` by default. + +## Accessing Local Studio + +The local Studio can be accessed through your browser at: + +``` +http://localhost:9925 +``` + +All database interactions from the local Studio are made directly from your browser to your Harper instance. Authentication is maintained via session cookies. diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 7996c06f..d37dfbae 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -45,6 +45,32 @@ "label": "Overview" } ] + }, + { + "type": "category", + "label": "Studio", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "studio/overview", + "label": "Overview" + } + ] + }, + { + "type": "category", + "label": "Legacy", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "legacy/cloud", + "label": "Harper Cloud" + } + ] } ] } diff --git a/src/css/custom.css b/src/css/custom.css index 970b2a67..22610ac3 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -38,11 +38,11 @@ code, } /* Custom styles for Learn sidebar category headers */ -.learn-category-header .menu__list-item-collapsible:hover { +.learn-category-header > .menu__list-item-collapsible:hover { background: none; } -.learn-category-header .menu__list-item-collapsible .menu__link { +.learn-category-header > .menu__list-item-collapsible > .menu__link { color: #6c757d !important; /* Grey text color */ border-radius: 0; border-bottom: 1px solid #dee2e6; /* Underline */ @@ -54,7 +54,7 @@ code, } /* Dark mode styling for category headers */ -[data-theme='dark'] .learn-category-header .menu__list-item-collapsible .menu__link { +[data-theme='dark'] .learn-category-header > .menu__list-item-collapsible > .menu__link { color: #adb5bd !important; /* Lighter grey for dark mode */ border-bottom-color: #495057; /* Darker underline for dark mode */ } diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index 24590266..162f394e 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -209,7 +209,7 @@ Based on migration map and reference plan, recommend this order. Each section is - `overview.md` 3. **Studio** (`reference_versioned_docs/version-v4/studio/`) - - `overview.md` + - `overview.md` - Simple page covering local Studio UI configuration and access 4. **Fastify Routes** (`reference_versioned_docs/version-v4/fastify-routes/`) - `overview.md` @@ -305,7 +305,7 @@ Based on migration map and reference plan, recommend this order. Each section is **Phase 1E - Legacy Content** 1. **Legacy** (`reference_versioned_docs/version-v4/legacy/`) - - `cloud/` (entire folder as-is) + - `cloud/` - Harper Cloud landing page to direct users to Fabric instead - `custom-functions/` (entire folder as-is) - `sql/` (entire folder as-is) diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 6756bc4a..2d9cd10c 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -601,13 +601,16 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Studio Section ### `reference/studio/overview.md` -- **Primary Source**: `versioned_docs/version-4.7/administration/harper-studio/index.md` -- **Additional Sources**: All harper-studio/*.md files -- **Merge Required**: Maybe - consolidate or keep nested? -- **Status**: Not Started -- **Notes**: May want to keep as nested folder or consolidate into single page +- **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` (localStudio configuration) +- **Status**: In Progress +- **Notes**: Simple overview page focusing on: + - How to configure/enable local Studio (localStudio.enabled in config) + - How to access local Studio (http://localhost:9926) + - General description of Studio's purpose (UI for Harper instance) + - Link to hosted Studio at studio.harperdb.io - **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Local studio upgrade to match online version + - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Local studio upgraded to match online version + - [4.7.0](release-notes/v4-tucker/4.7.0.md) - Studio client updated --- @@ -624,9 +627,10 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Legacy Section ### `reference/legacy/cloud/` -- **Primary Source**: `versioned_docs/version-4.7/deployments/harper-cloud/*` -- **Status**: N/A -- **Notes**: Move entire folder as-is, add deprecation notice +- **Primary Source**: `versioned_docs/version-4.7/administration/harper-studio/*` +- **Additional Sources**: `versioned_docs/version-4.7/deployments/harper-cloud/*` +- **Status**: In Progress +- **Notes**: The primary and additional sources are to be completely removed and this section is to act as a basic landing page to direct users to Fabric instead. ### `reference/legacy/custom-functions/` - **Primary Source**: `versioned_docs/version-4.1/custom-functions/*` @@ -673,9 +677,10 @@ These files require careful merging from multiple sources: - Migration path complex - Significant API changes in v4.4 -### Files Being Removed +### Files Being Removed/Ignored These exist in current docs but won't exist in new structure: +**To be moved to Learn guides:** - `versioned_docs/version-4.7/administration/administration.md` - Generic admin intro - `versioned_docs/version-4.7/administration/cloning.md` - Move to Learn guide - `versioned_docs/version-4.7/developers/applications/debugging.md` - Move to Learn guide @@ -685,8 +690,12 @@ These exist in current docs but won't exist in new structure: - `versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md` - Move to Learn guide - `versioned_docs/version-4.7/deployments/install-harper/*` - Move to Learn guides - `versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md` - Move to Learn guide + +**To be ignored (obsolete content):** - `versioned_docs/version-4.7/reference/index.md` - Generic intro page - `versioned_docs/version-4.7/reference/limits.md` - Fold into database/overview or schema +- `versioned_docs/version-4.7/administration/harper-studio/` - direct users to fabric +- `versioned_docs/version-4.7/deployments/harper-cloud/` - direct users to fabric ### Cross-References to Update Files that heavily reference paths that will change: From c6c99e5f6a94901bae80bdc98524bce7fd82dbce Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 24 Feb 2026 13:08:06 -0700 Subject: [PATCH 09/51] Fastify Routes Migration (#442) * docs: migrate Fastify Routes section to v4 consolidated reference Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 * fixup! docs: migrate Fastify Routes section to v4 consolidated reference * manual tweaks --------- Co-authored-by: Claude Sonnet 4.5 --- .../fastify-routes-link-placeholders.md | 11 ++ .../version-v4/fastify-routes/overview.md | 126 ++++++++++++++++++ .../version-v4-sidebars.json | 13 ++ v4-docs-migration-map.md | 2 +- 4 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 migration-context/link-placeholders/fastify-routes-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/fastify-routes/overview.md diff --git a/migration-context/link-placeholders/fastify-routes-link-placeholders.md b/migration-context/link-placeholders/fastify-routes-link-placeholders.md new file mode 100644 index 00000000..1abcd32f --- /dev/null +++ b/migration-context/link-placeholders/fastify-routes-link-placeholders.md @@ -0,0 +1,11 @@ +# Link Placeholders for Fastify Routes + +## reference_versioned_docs/version-v4/fastify-routes/overview.md + +- Line 10: `[Custom Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md "Resources documentation")` + - Context: Explaining that Fastify routes are discouraged in favor of modern routing with Custom Resources + - Target should be: Resources overview page + +- Line 9: `[REST interface](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST interface documentation")` + - Context: Recommending REST interface for better performance and standards compliance + - Target should be: REST overview page diff --git a/reference_versioned_docs/version-v4/fastify-routes/overview.md b/reference_versioned_docs/version-v4/fastify-routes/overview.md new file mode 100644 index 00000000..71734645 --- /dev/null +++ b/reference_versioned_docs/version-v4/fastify-routes/overview.md @@ -0,0 +1,126 @@ +--- +title: Define Fastify Routes +--- + + + +# Define Fastify Routes + +:::note +Fastify routes are discouraged in favor of modern routing with [Custom Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md "Resources documentation"), but remain a supported feature for backwards compatibility and specific use cases. +::: + +Harper provides a build-in plugin for loading [Fastify](https://www.fastify.io/) routes as a way to define custom endpoints for your Harper application. While we generally recommend building your endpoints/APIs with Harper's [REST interface](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST interface documentation") for better performance and standards compliance, Fastify routes can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. + +The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): + +```yaml +fastifyRoutes: + files: routes/*.js # specify the location of route definition modules +``` + +By default, route URLs are configured to be: + +``` +:// +``` + +However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. + +- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. + +In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. + +```javascript +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/', + method: 'POST', + preValidation: hdbCore.preValidation, + handler: hdbCore.request, + }); +}; +``` + +## Custom Handlers + +For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. + +**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** + +```javascript +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/:id', + method: 'GET', + handler: (request) => { + request.body= { + operation: 'sql', + sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` + }; + + const result = await hdbCore.requestWithoutAuthentication(request); + return result.filter((dog) => dog.age > 4); + } + }); +} +``` + +## Custom preValidation Hooks + +The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. + +Below is an example of a route that uses a custom validation hook: + +```javascript +import customValidation from '../helpers/customValidation'; + +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/:id', + method: 'GET', + preValidation: (request) => customValidation(request, logger), + handler: (request) => { + request.body = { + operation: 'sql', + sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, + }; + + return hdbCore.requestWithoutAuthentication(request); + }, + }); +}; +``` + +Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](#helper-methods). + +## Helper Methods + +When declaring routes, you are given access to 2 helper methods: hdbCore and logger. + +### hdbCore + +hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. + +#### preValidation + +This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. + +#### request + +This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. + +#### requestWithoutAuthentication + +Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: +- Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. + +### logger + +This helper allows you to write directly to the log file, hdb.log. It's useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. + +- logger.trace('Starting the handler for /dogs') +- logger.debug('This should only fire once') +- logger.warn('This should never ever fire') +- logger.error('This did not go well') +- logger.fatal('This did not go very well at all') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index d37dfbae..7b60e0b1 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -59,6 +59,19 @@ } ] }, + { + "type": "category", + "label": "Fastify Routes", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "fastify-routes/overview", + "label": "Overview" + } + ] + }, { "type": "category", "label": "Legacy", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 2d9cd10c..df51b8ca 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -619,7 +619,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ### `reference/fastify-routes/overview.md` - **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` - **Additional Sources**: Current `reference/define-routes.md` -- **Status**: Not Started +- **Status**: In Progress - **Notes**: Discouraged in favor of modern routing with components, but still a supported feature. --- From d3af5dd7f6c3a135a4b05a02149ffcca162b83b2 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 24 Feb 2026 13:08:56 -0700 Subject: [PATCH 10/51] format --- docusaurus.config.ts | 14 +- .../cli-link-placeholders.md | 12 + reference/index.md | 3 +- .../version-v4/cli/authentication.md | 14 +- .../version-v4/cli/commands.md | 14 +- .../version-v4/cli/operations-api-commands.md | 178 ++++---- .../version-v4/cli/overview.md | 36 +- .../version-v4/fastify-routes/overview.md | 5 +- .../version-v4/graphql-querying/overview.md | 2 +- reference_versioned_docs/version-v4/index.md | 3 +- .../version-v4/studio/overview.md | 4 +- scripts/analyze-pageview-data.mjs | 432 +++++++++--------- sidebarsReference.ts | 5 +- v4-docs-implementation-plan.md | 87 +++- v4-docs-migration-map.md | 85 +++- v4-docs-project-brief.md | 64 ++- v4-docs-reference-plan.md | 11 +- v4-docs-research.md | 54 +-- 18 files changed, 630 insertions(+), 393 deletions(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index eb37f5e0..bd504c5a 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -62,8 +62,8 @@ const config: Config = { '@docusaurus/plugin-content-pages', { id: 'default', - path: 'src/pages' - } + path: 'src/pages', + }, ], // Learn documentation [ @@ -98,8 +98,8 @@ const config: Config = { lastVersion: 'current', includeCurrentVersion: false, versions: { - 'current': { label: 'v5', path: 'v5' }, - 'v4': { label: 'v4', path: 'v4', banner: 'none' } + current: { label: 'v5', path: 'v5' }, + v4: { label: 'v4', path: 'v4', banner: 'none' }, }, // Converts npm commands in markdown code blocks to show npm/yarn/pnpm tabs remarkPlugins: [[require('@docusaurus/remark-plugin-npm2yarn'), { sync: true }]], @@ -358,12 +358,12 @@ const config: Config = { }, { label: 'Release Notes', - to: '/release-notes/v4-tucker' + to: '/release-notes/v4-tucker', }, { label: 'Fabric', - to: '/fabric' - } + to: '/fabric', + }, ], }, { diff --git a/migration-context/link-placeholders/cli-link-placeholders.md b/migration-context/link-placeholders/cli-link-placeholders.md index 60f8485e..49653d73 100644 --- a/migration-context/link-placeholders/cli-link-placeholders.md +++ b/migration-context/link-placeholders/cli-link-placeholders.md @@ -226,13 +226,16 @@ This document tracks all link placeholders in the CLI section that need to be re ## Summary ### Resolved Links (Within CLI Section) + - 12 links to pages within the CLI section (all resolved in this migration) - All internal CLI section links now use relative paths (e.g., `./overview.md`) ### Pending Links (Cross-Section References) + These will be resolved in future section migrations: **Operations API Section** (~82 links): + - `reference_versioned_docs/version-v4/operations-api/overview.md` (5 occurrences) - `reference_versioned_docs/version-v4/operations-api/operations.md` (2 occurrences) - Operations table category pages (72 links): @@ -251,32 +254,41 @@ These will be resolved in future section migrations: - `../operations-api/status.md` **Configuration Section** (5 links): + - `reference_versioned_docs/version-v4/configuration/overview.md` **Database Section** (3 links): + - `reference_versioned_docs/version-v4/database/compaction.md` (2 occurrences) - `reference_versioned_docs/version-v4/database/overview.md` (1 occurrence) **Security Section** (4 links): + - `reference_versioned_docs/version-v4/security/overview.md` (2 occurrences) - `reference_versioned_docs/version-v4/security/users-and-roles.md` (2 occurrences) **Logging Section** (1 link): + - `reference_versioned_docs/version-v4/logging/overview.md` **Components Section** (1 link): + - `reference_versioned_docs/version-v4/components/overview.md` **REST Section** (1 link): + - `reference_versioned_docs/version-v4/rest/overview.md` **GraphQL Querying Section** (1 link): + - `reference_versioned_docs/version-v4/graphql-querying/overview.md` **Applications Section** (1 link): + - `reference_versioned_docs/version-v4/applications/overview.md` **Learn Guides** (1 link): + - Deploying Harper Applications guide (external learn link) **Total Pending Links**: ~96 diff --git a/reference/index.md b/reference/index.md index c6e04335..976c77b0 100644 --- a/reference/index.md +++ b/reference/index.md @@ -1,2 +1,3 @@ # future v5 docs -replace with final product of /reference_versioned_docs/version-v4/ \ No newline at end of file + +replace with final product of /reference_versioned_docs/version-v4/ diff --git a/reference_versioned_docs/version-v4/cli/authentication.md b/reference_versioned_docs/version-v4/cli/authentication.md index 1ff6f327..dc22a942 100644 --- a/reference_versioned_docs/version-v4/cli/authentication.md +++ b/reference_versioned_docs/version-v4/cli/authentication.md @@ -51,6 +51,7 @@ harper get_components target=https://remote-instance.example.com:9925 ``` **Benefits**: + - Credentials not visible in command history - More secure for scripting - Can be set once per session @@ -83,10 +84,12 @@ harper describe_database \ ``` **Parameters**: + - `username=` - Harper admin username - `password=` - Harper admin password **Cautions**: + - Credentials visible in command history - Less secure for production environments - Exposed in process listings @@ -164,7 +167,7 @@ harper deploy target=https://prod.example.com:9925 ### 4. Use Least Privilege -Create dedicated users with minimal required permissions for CLI operations instead of using the main admin account. See [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles documentation") for more information. +Create dedicated users with minimal required permissions for CLI operations instead of using the main admin account. See [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md 'Users and roles documentation') for more information. ### 5. Rotate Credentials @@ -172,7 +175,7 @@ Regularly rotate credentials, especially for automated systems and CI/CD pipelin ### 6. Audit Access -Monitor and audit CLI operations, especially for production environments. See [Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md "Logging documentation") for more information on logging. +Monitor and audit CLI operations, especially for production environments. See [Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md 'Logging documentation') for more information on logging. ## Troubleshooting @@ -190,6 +193,7 @@ If you receive authentication errors: - Verify HTTPS/HTTP protocol 3. **Check network connectivity**: + ```bash curl https://server.com:9925 ``` @@ -203,6 +207,7 @@ If you receive authentication errors: If environment variables aren't working: 1. **Verify variables are set**: + ```bash echo $CLI_TARGET_USERNAME echo $CLI_TARGET_PASSWORD @@ -210,6 +215,7 @@ If environment variables aren't working: 2. **Export variables**: Ensure you used `export`, not just assignment: + ```bash # Wrong - variable only available in current shell CLI_TARGET_USERNAME=admin @@ -227,5 +233,5 @@ If environment variables aren't working: - [CLI Overview](./overview.md) - General CLI information - [CLI Commands](./commands.md) - Core CLI commands - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md "Security overview") - Harper security features -- [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles") - User management +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security overview') - Harper security features +- [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md 'Users and roles') - User management diff --git a/reference_versioned_docs/version-v4/cli/commands.md b/reference_versioned_docs/version-v4/cli/commands.md index 2d46a5ac..3f64de6b 100644 --- a/reference_versioned_docs/version-v4/cli/commands.md +++ b/reference_versioned_docs/version-v4/cli/commands.md @@ -58,7 +58,7 @@ harper \ --ROOTPATH='/hdb' ``` -**Note**: When used in conjunction, command line arguments override environment variables. See [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview") for a full list of configuration parameters. +**Note**: When used in conjunction, command line arguments override environment variables. See [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview') for a full list of configuration parameters. :::info For more information on installation, see [Getting Started / Install and Connect Harper](/learn/getting-started/install-and-connect-harper). @@ -87,6 +87,7 @@ harper dev /path/to/app ``` **Features**: + - Pushes logs to standard streams automatically - Uses a single thread for simpler debugging - Auto-restart on file changes @@ -152,6 +153,7 @@ harper version ``` **Example Output**: + ``` 4.7.0 ``` @@ -167,6 +169,7 @@ harper status ``` Shows: + - Harper process status - Clustering network status - Replication statuses @@ -208,6 +211,7 @@ harper copy-db ``` **Parameters**: + - `` - Name of the source database - `` - Full path to the target database file @@ -220,12 +224,13 @@ harper copy-db data /home/user/hdb/database/copy.mdb This copies the default `data` database to a new location with compaction applied. **Use Cases**: + - Database optimization - Eliminating fragmentation - Creating compacted backups - Reclaiming free space -See also: [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Database compaction reference") for more information. +See also: [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md 'Database compaction reference') for more information. #### How Backups Work @@ -244,6 +249,7 @@ Database files are stored in the `hdb/database` directory. As long as the snapsh - **Safe Copying**: Standard file copying is only reliable for database files that are **not in use** **Recommended Backup Tools**: + - LVM snapshots - ZFS snapshots - BTRFS snapshots @@ -259,5 +265,5 @@ The CLI supports executing commands on remote Harper instances. For details, see - [CLI Overview](./overview.md) - General CLI information - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI - [CLI Authentication](./authentication.md) - Authentication mechanisms -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration") - Configuration parameters for installation -- [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Compaction") - More on database compaction +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration') - Configuration parameters for installation +- [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md 'Compaction') - More on database compaction diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md index faf217d2..b3f116da 100644 --- a/reference_versioned_docs/version-v4/cli/operations-api-commands.md +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -9,7 +9,7 @@ title: Operations API Commands Added in: v4.3.0 (confirmed via release notes) -The Harper CLI supports executing operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview") directly from the command line. This enables powerful automation and scripting capabilities. +The Harper CLI supports executing operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') directly from the command line. This enables powerful automation and scripting capabilities. ## General Syntax @@ -18,6 +18,7 @@ harper = ``` **Output Format**: + - Default: YAML - JSON: Pass `json=true` as a parameter @@ -33,80 +34,80 @@ The following operations are available through the CLI. Operations that require This is just a brief overview of all operations available as CLI commands. Review the respective operation documentation for more information on available arguments and expected behavior. Keep in mind that all operations options are converted to CLI arguments in the same way (using `snake_case`). ::: -| Operation | Description | Category | Available Since | -|-----------|-------------|----------|-----------------| -| `describe_table` | Describe table structure and metadata | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `describe_all` | Describe all databases and tables | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `describe_database` | Describe database structure | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `create_database` | Create a new database | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `drop_database` | Delete a database | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `create_table` | Create a new table | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `drop_table` | Delete a table | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `create_attribute` | Create a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `drop_attribute` | Delete a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `search_by_id` | Search records by ID | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `search_by_value` | Search records by attribute value | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `insert` | Insert new records | [Data](TODO:../operations-api/data.md) | v4.4.9 | -| `update` | Update existing records | [Data](TODO:../operations-api/data.md) | v4.4.9 | -| `upsert` | Insert or update records | [Data](TODO:../operations-api/data.md) | v4.4.9 | -| `delete` | Delete records | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `sql` | Execute SQL queries | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `csv_file_load` | Load data from CSV file | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `csv_url_load` | Load data from CSV URL | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `list_users` | List all users | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `add_user` | Create a new user | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `alter_user` | Modify user properties | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `drop_user` | Delete a user | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `list_roles` | List all roles | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `drop_role` | Delete a role | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `create_csr` | Create certificate signing request | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `sign_certificate` | Sign a certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `list_certificates` | List SSL/TLS certificates | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `add_certificate` | Add SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `remove_certificate` | Remove SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `add_ssh_key` | Add SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `get_ssh_key` | Get SSH key | [Security](TODO:../operations-api/security.md) | v4.7.2 | -| `update_ssh_key` | Update SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `delete_ssh_key` | Delete SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `list_ssh_keys` | List all SSH keys | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `set_ssh_known_hosts` | Set SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `get_ssh_known_hosts` | Get SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `cluster_get_routes` | Get cluster routing information | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `cluster_network` | Get cluster network status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `cluster_status` | Get cluster status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `remove_node` | Remove node from cluster | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `add_component` | Add a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `deploy_component` | Deploy a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `deploy` (alias) | Alias for `deploy_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `package_component` | Package a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `package` (alias) | Alias for `package_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `drop_component` | Remove a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `get_components` | List all components | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `get_component_file` | Get component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `set_component_file` | Set component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `install_node_modules` | Install Node.js dependencies | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `set_configuration` | Update configuration settings | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | -| `get_configuration` | Get current configuration | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | -| `create_authentication_tokens` | Create authentication tokens | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | -| `refresh_operation_token` | Refresh operation token | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | -| `restart_service` | Restart Harper service | [System](TODO:../operations-api/system.md) | v4.3.0 | -| `restart` | Restart Harper instance | [System](TODO:../operations-api/system.md) | v4.3.0 | -| `system_information` | Get system information | [System](TODO:../operations-api/system.md) | v4.3.0 | -| `registration_info` | Get registration information | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | -| `get_fingerprint` | Get instance fingerprint | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | -| `set_license` | Set license key | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | -| `get_usage_licenses` | Get usage and license info | [Licensing](TODO:../operations-api/licensing.md) | v4.7.3 | -| `get_job` | Get job status | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | -| `search_jobs_by_start_date` | Search jobs by start date | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | -| `read_log` | Read application logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `read_transaction_log` | Read transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `read_audit_log` | Read audit logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `delete_transaction_logs_before` | Delete old transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `purge_stream` | Purge streaming data | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | -| `delete_records_before` | Delete old records | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | -| `get_status` | Get custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | -| `set_status` | Set custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | -| `clear_status` | Clear custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | +| Operation | Description | Category | Available Since | +| -------------------------------- | ------------------------------------- | ---------------------------------------------------------- | --------------- | +| `describe_table` | Describe table structure and metadata | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `describe_all` | Describe all databases and tables | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `describe_database` | Describe database structure | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `create_database` | Create a new database | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `drop_database` | Delete a database | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `create_table` | Create a new table | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `drop_table` | Delete a table | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `create_attribute` | Create a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `drop_attribute` | Delete a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | +| `search_by_id` | Search records by ID | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `search_by_value` | Search records by attribute value | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `insert` | Insert new records | [Data](TODO:../operations-api/data.md) | v4.4.9 | +| `update` | Update existing records | [Data](TODO:../operations-api/data.md) | v4.4.9 | +| `upsert` | Insert or update records | [Data](TODO:../operations-api/data.md) | v4.4.9 | +| `delete` | Delete records | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `sql` | Execute SQL queries | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `csv_file_load` | Load data from CSV file | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `csv_url_load` | Load data from CSV URL | [Data](TODO:../operations-api/data.md) | v4.3.0 | +| `list_users` | List all users | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `add_user` | Create a new user | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `alter_user` | Modify user properties | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `drop_user` | Delete a user | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `list_roles` | List all roles | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `drop_role` | Delete a role | [Security](TODO:../operations-api/security.md) | v4.3.0 | +| `create_csr` | Create certificate signing request | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `sign_certificate` | Sign a certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `list_certificates` | List SSL/TLS certificates | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `add_certificate` | Add SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `remove_certificate` | Remove SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `add_ssh_key` | Add SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `get_ssh_key` | Get SSH key | [Security](TODO:../operations-api/security.md) | v4.7.2 | +| `update_ssh_key` | Update SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `delete_ssh_key` | Delete SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `list_ssh_keys` | List all SSH keys | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `set_ssh_known_hosts` | Set SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `get_ssh_known_hosts` | Get SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | +| `cluster_get_routes` | Get cluster routing information | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `cluster_network` | Get cluster network status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `cluster_status` | Get cluster status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `remove_node` | Remove node from cluster | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | +| `add_component` | Add a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `deploy_component` | Deploy a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `deploy` (alias) | Alias for `deploy_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `package_component` | Package a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `package` (alias) | Alias for `package_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `drop_component` | Remove a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `get_components` | List all components | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `get_component_file` | Get component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `set_component_file` | Set component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `install_node_modules` | Install Node.js dependencies | [Components](TODO:../operations-api/components.md) | v4.3.0 | +| `set_configuration` | Update configuration settings | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | +| `get_configuration` | Get current configuration | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | +| `create_authentication_tokens` | Create authentication tokens | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | +| `refresh_operation_token` | Refresh operation token | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | +| `restart_service` | Restart Harper service | [System](TODO:../operations-api/system.md) | v4.3.0 | +| `restart` | Restart Harper instance | [System](TODO:../operations-api/system.md) | v4.3.0 | +| `system_information` | Get system information | [System](TODO:../operations-api/system.md) | v4.3.0 | +| `registration_info` | Get registration information | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | +| `get_fingerprint` | Get instance fingerprint | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | +| `set_license` | Set license key | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | +| `get_usage_licenses` | Get usage and license info | [Licensing](TODO:../operations-api/licensing.md) | v4.7.3 | +| `get_job` | Get job status | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | +| `search_jobs_by_start_date` | Search jobs by start date | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | +| `read_log` | Read application logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `read_transaction_log` | Read transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `read_audit_log` | Read audit logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `delete_transaction_logs_before` | Delete old transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | +| `purge_stream` | Purge streaming data | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | +| `delete_records_before` | Delete old records | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | +| `get_status` | Get custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | +| `set_status` | Set custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | +| `clear_status` | Clear custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | ### Command Aliases @@ -115,7 +116,7 @@ The following aliases are available for convenience: - `deploy` → `deploy_component` - `package` → `package_component` -For detailed parameter information for each operation, see the [Operations API documentation](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Complete operations list"). +For detailed parameter information for each operation, see the [Operations API documentation](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'Complete operations list'). ## Command Examples @@ -134,6 +135,7 @@ harper describe_table database=dev table=dog ``` **Example Output**: + ```yaml schema: dev name: dog @@ -151,7 +153,7 @@ last_updated_record: 1724483231970.9949 ``` :::tip -For detailed information on database and table structures, see the [Database Reference](TODO:reference_versioned_docs/version-v4/database/overview.md "Database reference documentation"). +For detailed information on database and table structures, see the [Database Reference](TODO:reference_versioned_docs/version-v4/database/overview.md 'Database reference documentation'). ::: ### Data Operations @@ -169,7 +171,7 @@ harper search_by_value table=dog search_attribute=name search_value=harper get_a ``` :::tip -For more information on querying data, see the [REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST API reference") and [GraphQL Querying](TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md "GraphQL querying reference"). +For more information on querying data, see the [REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST API reference') and [GraphQL Querying](TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md 'GraphQL querying reference'). ::: ### Configuration Operations @@ -187,7 +189,7 @@ harper get_configuration ``` :::tip -For comprehensive configuration options, see the [Configuration Reference](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration reference"). +For comprehensive configuration options, see the [Configuration Reference](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration reference'). ::: ### Component Operations @@ -211,7 +213,7 @@ harper deploy project=my-app package=https://github.com/user/repo ``` :::tip -For more information on components and applications, see the [Components Reference](TODO:reference_versioned_docs/version-v4/components/overview.md "Components reference"). +For more information on components and applications, see the [Components Reference](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components reference'). ::: ### User and Role Operations @@ -229,7 +231,7 @@ harper list_roles ``` :::tip -For detailed information on users, roles, and authentication, see the [Security Reference](TODO:reference_versioned_docs/version-v4/security/overview.md "Security reference"). +For detailed information on users, roles, and authentication, see the [Security Reference](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security reference'). ::: ## Remote Operations @@ -267,8 +269,9 @@ harper restart target=https://server.com:9925 replicated=true ``` For more information on Harper applications and components, see: -- [Applications](TODO:reference_versioned_docs/version-v4/applications/overview.md "Applications reference") - Application architecture and structure -- [Deploying Harper Applications](TODO:learn_link "Deploying applications guide") - Step-by-step deployment guide + +- [Applications](TODO:reference_versioned_docs/version-v4/applications/overview.md 'Applications reference') - Application architecture and structure +- [Deploying Harper Applications](TODO:learn_link 'Deploying applications guide') - Step-by-step deployment guide ## Parameter Formatting @@ -291,7 +294,8 @@ harper search_by_id database=dev table=dog ids='["1","2","3"]' ### Object Parameters Object parameters are not supported via CLI. For operations requiring complex nested objects, use: -- The [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API") via HTTP + +- The [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API') via HTTP - A custom script or tool ### Boolean Parameters @@ -365,12 +369,12 @@ The following operation types are **not supported** via CLI: - File upload operations - Streaming operations -For these operations, use the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API") directly via HTTP. +For these operations, use the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API') directly via HTTP. ## See Also - [CLI Overview](./overview.md) - General CLI information - [CLI Commands](./commands.md) - Core CLI commands -- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview") - Operations API documentation -- [Operations API Reference](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Operations reference") - Complete operations list +- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') - Operations API documentation +- [Operations API Reference](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'Operations reference') - Complete operations list - [CLI Authentication](./authentication.md) - Authentication details diff --git a/reference_versioned_docs/version-v4/cli/overview.md b/reference_versioned_docs/version-v4/cli/overview.md index 62978525..079322f6 100644 --- a/reference_versioned_docs/version-v4/cli/overview.md +++ b/reference_versioned_docs/version-v4/cli/overview.md @@ -33,6 +33,7 @@ Changed in: v4.7.0 The CLI command is `harper`. From v4.1.0 to v4.6.x, the command was only available as `harperdb`. Starting in v4.7.0, the preferred command is `harper`, though `harperdb` continues to work as an alias for backward compatibility. **Examples**: + ```bash # Modern usage (v4.7.0+) harper @@ -83,6 +84,7 @@ When Harper is running, the process identifier (PID) is stored in a file named ` **Location**: `/hdb.pid` **Example**: + ```bash # Read the PID cat /path/to/hdb/hdb.pid @@ -93,19 +95,19 @@ kill -0 $(cat /path/to/hdb/hdb.pid) # Check if process is running ## System Management Commands -| Command | Description | Available Since | -|---------|-------------|-----------------| -| `harper` | Run Harper in foreground mode (default behavior) | v4.1.0 | -| `harper run ` | Run Harper application from any directory | v4.2.0 | -| `harper dev ` | Run Harper in dev mode with auto-restart and console logging | v4.2.0 | -| `harper restart` | Restart Harper | v4.1.0 | -| `harper start` | Start Harper in background (daemon mode) | v4.1.0 | -| `harper stop` | Stop a running Harper instance | v4.1.0 | -| `harper status` | Display Harper and clustering status | v4.1.0 | -| `harper version` | Show installed Harper version | v4.1.0 | -| `harper renew-certs` | Renew Harper-generated self-signed certificates | v4.1.0 | -| `harper copy-db ` | Copy a database with compaction | v4.1.0 | -| `harper help` | Display all available CLI commands | v4.1.0 | +| Command | Description | Available Since | +| ---------------------------------- | ------------------------------------------------------------ | --------------- | +| `harper` | Run Harper in foreground mode (default behavior) | v4.1.0 | +| `harper run ` | Run Harper application from any directory | v4.2.0 | +| `harper dev ` | Run Harper in dev mode with auto-restart and console logging | v4.2.0 | +| `harper restart` | Restart Harper | v4.1.0 | +| `harper start` | Start Harper in background (daemon mode) | v4.1.0 | +| `harper stop` | Stop a running Harper instance | v4.1.0 | +| `harper status` | Display Harper and clustering status | v4.1.0 | +| `harper version` | Show installed Harper version | v4.1.0 | +| `harper renew-certs` | Renew Harper-generated self-signed certificates | v4.1.0 | +| `harper copy-db ` | Copy a database with compaction | v4.1.0 | +| `harper help` | Display all available CLI commands | v4.1.0 | See [CLI Commands](./commands.md) for detailed documentation on each command. @@ -113,7 +115,7 @@ See [CLI Commands](./commands.md) for detailed documentation on each command. Added in: v4.3.0 (confirmed via release notes) -The Harper CLI supports executing most operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview") directly from the command line. This includes operations that do not require complex nested parameters. +The Harper CLI supports executing most operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') directly from the command line. This includes operations that do not require complex nested parameters. **Syntax**: `harper =` @@ -150,6 +152,7 @@ Changed in: v4.3.0 (expanded remote operations support) The CLI can execute operations on remote Harper instances by passing the `target` parameter with the HTTP address of the remote instance. **Authentication**: Provide credentials via: + - Parameters: `username= password=` - Environment variables: `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` @@ -180,6 +183,7 @@ harper dev /path/to/app ``` **Features**: + - Console logging for immediate feedback - Debugging enabled - Auto-restart on file changes @@ -192,5 +196,5 @@ See [CLI Commands](./commands.md) for detailed information on `harper dev` and o - [CLI Commands](./commands.md) - Detailed reference for each CLI command - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI - [CLI Authentication](./authentication.md) - Authentication mechanisms -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview") - Harper configuration options -- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API") - Full operations API reference +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview') - Harper configuration options +- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API') - Full operations API reference diff --git a/reference_versioned_docs/version-v4/fastify-routes/overview.md b/reference_versioned_docs/version-v4/fastify-routes/overview.md index 71734645..9ce54f8d 100644 --- a/reference_versioned_docs/version-v4/fastify-routes/overview.md +++ b/reference_versioned_docs/version-v4/fastify-routes/overview.md @@ -7,10 +7,10 @@ title: Define Fastify Routes # Define Fastify Routes :::note -Fastify routes are discouraged in favor of modern routing with [Custom Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md "Resources documentation"), but remain a supported feature for backwards compatibility and specific use cases. +Fastify routes are discouraged in favor of modern routing with [Custom Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources documentation'), but remain a supported feature for backwards compatibility and specific use cases. ::: -Harper provides a build-in plugin for loading [Fastify](https://www.fastify.io/) routes as a way to define custom endpoints for your Harper application. While we generally recommend building your endpoints/APIs with Harper's [REST interface](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST interface documentation") for better performance and standards compliance, Fastify routes can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. +Harper provides a build-in plugin for loading [Fastify](https://www.fastify.io/) routes as a way to define custom endpoints for your Harper application. While we generally recommend building your endpoints/APIs with Harper's [REST interface](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface documentation') for better performance and standards compliance, Fastify routes can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): @@ -113,6 +113,7 @@ This will execute a request with Harper using the operations API. The `request.b #### requestWithoutAuthentication Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: + - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. ### logger diff --git a/reference_versioned_docs/version-v4/graphql-querying/overview.md b/reference_versioned_docs/version-v4/graphql-querying/overview.md index 2285083e..d9100579 100644 --- a/reference_versioned_docs/version-v4/graphql-querying/overview.md +++ b/reference_versioned_docs/version-v4/graphql-querying/overview.md @@ -14,7 +14,7 @@ Added in: v4.4.0 (provisional) Changed in: v4.5.0 (disabled by default, configuration options) -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md "Schema definition documentation"), and for querying [Resources](./resources/overview.md). +Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md 'Schema definition documentation'), and for querying [Resources](./resources/overview.md). Get started by setting `graphql: true` in `config.yaml`. This configuration option was added in v4.5.0 to allow more granular control over the GraphQL endpoint. diff --git a/reference_versioned_docs/version-v4/index.md b/reference_versioned_docs/version-v4/index.md index a61aa632..fbeeb997 100644 --- a/reference_versioned_docs/version-v4/index.md +++ b/reference_versioned_docs/version-v4/index.md @@ -1,2 +1,3 @@ # v4 -replace with new reference docs content \ No newline at end of file + +replace with new reference docs content diff --git a/reference_versioned_docs/version-v4/studio/overview.md b/reference_versioned_docs/version-v4/studio/overview.md index 41e2d46d..c2321d6c 100644 --- a/reference_versioned_docs/version-v4/studio/overview.md +++ b/reference_versioned_docs/version-v4/studio/overview.md @@ -13,11 +13,11 @@ Harper Local Studio is a web-based GUI that enables you to administer, navigate, It is automatically bundled with all Harper instances and is enabled by default on the Operations API port. -If you're looking for the platform as a service interface, go to [Harper Fabric](https://fabric.harper.fast) instead. +If you're looking for the platform as a service interface, go to [Harper Fabric](https://fabric.harper.fast) instead. ## Configuration -To enable the local Studio, set `localStudio.enabled` to `true` in your [configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio "Configuration options"): +To enable the local Studio, set `localStudio.enabled` to `true` in your [configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio 'Configuration options'): ```yaml localStudio: diff --git a/scripts/analyze-pageview-data.mjs b/scripts/analyze-pageview-data.mjs index a529cfd6..62a33b78 100644 --- a/scripts/analyze-pageview-data.mjs +++ b/scripts/analyze-pageview-data.mjs @@ -11,256 +11,276 @@ const data = dataRaw.split('\r\n').slice(9); // remove first 9 lines // - there are no `,` characters within the header text data.shift(); // Remove header row // - nor are there `,` characters within any of the data values -const records = data.map(row => row.split(',')); +const records = data.map((row) => row.split(',')); // Parse records into objects with typed values const pages = records - .filter(row => row.length >= 2 && row[0]) // Filter out empty rows - .map(row => ({ - path: row[0], - views: parseInt(row[1]) || 0, - activeUsers: parseInt(row[2]) || 0, - })); + .filter((row) => row.length >= 2 && row[0]) // Filter out empty rows + .map((row) => ({ + path: row[0], + views: parseInt(row[1]) || 0, + activeUsers: parseInt(row[2]) || 0, + })); // Analysis Functions function getTotalViews() { - return pages.reduce((sum, page) => sum + page.views, 0); + return pages.reduce((sum, page) => sum + page.views, 0); } function getPathCount() { - return pages.length; + return pages.length; } function getViewsByRootPath() { - const rootPaths = {}; - pages.forEach(page => { - const parts = page.path.split('/').filter(Boolean); - const root = parts[0] || 'root'; - if (!rootPaths[root]) { - rootPaths[root] = { views: 0, paths: 0 }; - } - rootPaths[root].views += page.views; - rootPaths[root].paths += 1; - }); - return rootPaths; + const rootPaths = {}; + pages.forEach((page) => { + const parts = page.path.split('/').filter(Boolean); + const root = parts[0] || 'root'; + if (!rootPaths[root]) { + rootPaths[root] = { views: 0, paths: 0 }; + } + rootPaths[root].views += page.views; + rootPaths[root].paths += 1; + }); + return rootPaths; } function getTopPagesByViews(n) { - return [...pages].sort((a, b) => b.views - a.views).slice(0, n); + return [...pages].sort((a, b) => b.views - a.views).slice(0, n); } function getCumulativeViewPercentages() { - const sorted = [...pages].sort((a, b) => b.views - a.views); - const totalViews = getTotalViews(); - const percentages = []; - let cumulative = 0; - - sorted.forEach((page, index) => { - cumulative += page.views; - const percentage = (cumulative / totalViews) * 100; - percentages.push({ - rank: index + 1, - path: page.path, - views: page.views, - cumulativeViews: cumulative, - cumulativePercentage: percentage, - }); - }); - - return percentages; + const sorted = [...pages].sort((a, b) => b.views - a.views); + const totalViews = getTotalViews(); + const percentages = []; + let cumulative = 0; + + sorted.forEach((page, index) => { + cumulative += page.views; + const percentage = (cumulative / totalViews) * 100; + percentages.push({ + rank: index + 1, + path: page.path, + views: page.views, + cumulativeViews: cumulative, + cumulativePercentage: percentage, + }); + }); + + return percentages; } function findCoverageThresholds(percentages) { - const thresholds = [50, 75, 80, 90, 95, 99]; - const results = []; - - thresholds.forEach(threshold => { - const index = percentages.findIndex(p => p.cumulativePercentage >= threshold); - if (index !== -1) { - results.push({ - percentage: threshold, - pathCount: index + 1, - totalPaths: pages.length, - pathPercentage: ((index + 1) / pages.length * 100).toFixed(2), - }); - } - }); - - return results; + const thresholds = [50, 75, 80, 90, 95, 99]; + const results = []; + + thresholds.forEach((threshold) => { + const index = percentages.findIndex((p) => p.cumulativePercentage >= threshold); + if (index !== -1) { + results.push({ + percentage: threshold, + pathCount: index + 1, + totalPaths: pages.length, + pathPercentage: (((index + 1) / pages.length) * 100).toFixed(2), + }); + } + }); + + return results; } // Output Functions function printTotalStats() { - console.log('Total Statistics'); - console.log('Overview of the entire dataset'); - console.log('Results:'); - console.log(` Total Paths: ${getPathCount().toLocaleString()}`); - console.log(` Total Views: ${getTotalViews().toLocaleString()}`); - console.log(` Average Views per Path: ${(getTotalViews() / getPathCount()).toFixed(2)}`); - console.log(); + console.log('Total Statistics'); + console.log('Overview of the entire dataset'); + console.log('Results:'); + console.log(` Total Paths: ${getPathCount().toLocaleString()}`); + console.log(` Total Views: ${getTotalViews().toLocaleString()}`); + console.log(` Average Views per Path: ${(getTotalViews() / getPathCount()).toFixed(2)}`); + console.log(); } function printTopPages(n = 20) { - const top = getTopPagesByViews(n); - const totalViews = getTotalViews(); - - console.log(`Top ${n} Pages by Views`); - console.log(`The most viewed pages and their contribution to total site views`); - console.log('Results:'); - top.forEach((page, index) => { - const percentage = (page.views / totalViews * 100).toFixed(2); - console.log(` ${(index + 1).toString().padStart(2)}. ${page.path}`); - console.log(` ${page.views.toLocaleString()} views (${percentage}%)`); - }); - console.log(); + const top = getTopPagesByViews(n); + const totalViews = getTotalViews(); + + console.log(`Top ${n} Pages by Views`); + console.log(`The most viewed pages and their contribution to total site views`); + console.log('Results:'); + top.forEach((page, index) => { + const percentage = ((page.views / totalViews) * 100).toFixed(2); + console.log(` ${(index + 1).toString().padStart(2)}. ${page.path}`); + console.log(` ${page.views.toLocaleString()} views (${percentage}%)`); + }); + console.log(); } function printRootPathAnalysis() { - const rootPaths = getViewsByRootPath(); - const totalViews = getTotalViews(); - const mainPaths = ['docs', 'release-notes', 'fabric', 'learn']; - - console.log('Views by Root Path'); - console.log('Distribution of views across top-level paths'); - console.log('Results:'); - - // Display main paths - mainPaths.forEach(root => { - const stats = rootPaths[root]; - if (stats) { - const percentage = (stats.views / totalViews * 100).toFixed(2); - console.log(` /${root}`); - console.log(` Views: ${stats.views.toLocaleString()} (${percentage}%)`); - console.log(` Paths: ${stats.paths.toLocaleString()}`); - console.log(` Avg Views/Path: ${(stats.views / stats.paths).toFixed(2)}`); - } - }); - - // List other root paths - const otherRoots = Object.keys(rootPaths) - .filter(root => !mainPaths.includes(root)) - .sort((a, b) => rootPaths[b].views - rootPaths[a].views); - - if (otherRoots.length > 0) { - console.log(' Other root paths:'); - otherRoots.forEach(root => { - const stats = rootPaths[root]; - const percentage = (stats.views / totalViews * 100).toFixed(2); - console.log(` /${root}: ${stats.views.toLocaleString()} views (${percentage}%), ${stats.paths} paths`); - }); - } - - console.log(); + const rootPaths = getViewsByRootPath(); + const totalViews = getTotalViews(); + const mainPaths = ['docs', 'release-notes', 'fabric', 'learn']; + + console.log('Views by Root Path'); + console.log('Distribution of views across top-level paths'); + console.log('Results:'); + + // Display main paths + mainPaths.forEach((root) => { + const stats = rootPaths[root]; + if (stats) { + const percentage = ((stats.views / totalViews) * 100).toFixed(2); + console.log(` /${root}`); + console.log(` Views: ${stats.views.toLocaleString()} (${percentage}%)`); + console.log(` Paths: ${stats.paths.toLocaleString()}`); + console.log(` Avg Views/Path: ${(stats.views / stats.paths).toFixed(2)}`); + } + }); + + // List other root paths + const otherRoots = Object.keys(rootPaths) + .filter((root) => !mainPaths.includes(root)) + .sort((a, b) => rootPaths[b].views - rootPaths[a].views); + + if (otherRoots.length > 0) { + console.log(' Other root paths:'); + otherRoots.forEach((root) => { + const stats = rootPaths[root]; + const percentage = ((stats.views / totalViews) * 100).toFixed(2); + console.log(` /${root}: ${stats.views.toLocaleString()} views (${percentage}%), ${stats.paths} paths`); + }); + } + + console.log(); } function printCoverageThresholds() { - const percentages = getCumulativeViewPercentages(); - const thresholds = findCoverageThresholds(percentages); - - console.log('Coverage Analysis'); - console.log('How many paths account for X% of total views'); - console.log('Results:'); - thresholds.forEach(threshold => { - console.log(` ${threshold.percentage}% of views: ${threshold.pathCount} paths (${threshold.pathPercentage}% of all paths)`); - }); - console.log(); + const percentages = getCumulativeViewPercentages(); + const thresholds = findCoverageThresholds(percentages); + + console.log('Coverage Analysis'); + console.log('How many paths account for X% of total views'); + console.log('Results:'); + thresholds.forEach((threshold) => { + console.log( + ` ${threshold.percentage}% of views: ${threshold.pathCount} paths (${threshold.pathPercentage}% of all paths)` + ); + }); + console.log(); } function printViewCountDistribution() { - const totalViews = getTotalViews(); - const totalPaths = pages.length; - - // Create ranges for high traffic - const highTrafficRanges = []; - for (let i = 100; i < 10000; i += 50) { - highTrafficRanges.push({ min: i, max: i + 49 }); - } - - // Calculate stats for each category - const highTrafficPaths = pages.filter(p => p.views >= 100); - const mediumTrafficPaths = pages.filter(p => p.views >= 10 && p.views < 100); - const lowTrafficPaths = pages.filter(p => p.views >= 1 && p.views < 10); - const zeroTrafficPaths = pages.filter(p => p.views === 0); - - const highTrafficViews = highTrafficPaths.reduce((sum, p) => sum + p.views, 0); - const mediumTrafficViews = mediumTrafficPaths.reduce((sum, p) => sum + p.views, 0); - const lowTrafficViews = lowTrafficPaths.reduce((sum, p) => sum + p.views, 0); - - console.log('View Count Distribution'); - console.log('Number of paths grouped by their view count'); - console.log('Results:'); - - // High traffic breakdown - console.log(' High traffic (100+ views):'); - console.log(` Total Paths: ${highTrafficPaths.length} (${(highTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); - console.log(` Total Views: ${highTrafficViews.toLocaleString()} (${(highTrafficViews / totalViews * 100).toFixed(2)}% of views)`); - console.log(` Breakdown by range:`); - - highTrafficRanges.forEach(range => { - const pathsInRange = highTrafficPaths.filter(p => p.views >= range.min && p.views <= range.max); - if (pathsInRange.length > 0) { - const viewsInRange = pathsInRange.reduce((sum, p) => sum + p.views, 0); - console.log(` ${range.min}-${range.max} views: ${pathsInRange.length} paths (${viewsInRange.toLocaleString()} views)`); - } - }); - - // Medium traffic - console.log(' Medium traffic (10-99 views):'); - console.log(` Total Paths: ${mediumTrafficPaths.length} (${(mediumTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); - console.log(` Total Views: ${mediumTrafficViews.toLocaleString()} (${(mediumTrafficViews / totalViews * 100).toFixed(2)}% of views)`); - - // Low traffic - console.log(' Low traffic (1-9 views):'); - console.log(` Total Paths: ${lowTrafficPaths.length} (${(lowTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); - console.log(` Total Views: ${lowTrafficViews.toLocaleString()} (${(lowTrafficViews / totalViews * 100).toFixed(2)}% of views)`); - - // Zero views - console.log(' Zero views:'); - console.log(` Total Paths: ${zeroTrafficPaths.length} (${(zeroTrafficPaths.length / totalPaths * 100).toFixed(2)}% of paths)`); - console.log(); + const totalViews = getTotalViews(); + const totalPaths = pages.length; + + // Create ranges for high traffic + const highTrafficRanges = []; + for (let i = 100; i < 10000; i += 50) { + highTrafficRanges.push({ min: i, max: i + 49 }); + } + + // Calculate stats for each category + const highTrafficPaths = pages.filter((p) => p.views >= 100); + const mediumTrafficPaths = pages.filter((p) => p.views >= 10 && p.views < 100); + const lowTrafficPaths = pages.filter((p) => p.views >= 1 && p.views < 10); + const zeroTrafficPaths = pages.filter((p) => p.views === 0); + + const highTrafficViews = highTrafficPaths.reduce((sum, p) => sum + p.views, 0); + const mediumTrafficViews = mediumTrafficPaths.reduce((sum, p) => sum + p.views, 0); + const lowTrafficViews = lowTrafficPaths.reduce((sum, p) => sum + p.views, 0); + + console.log('View Count Distribution'); + console.log('Number of paths grouped by their view count'); + console.log('Results:'); + + // High traffic breakdown + console.log(' High traffic (100+ views):'); + console.log( + ` Total Paths: ${highTrafficPaths.length} (${((highTrafficPaths.length / totalPaths) * 100).toFixed(2)}% of paths)` + ); + console.log( + ` Total Views: ${highTrafficViews.toLocaleString()} (${((highTrafficViews / totalViews) * 100).toFixed(2)}% of views)` + ); + console.log(` Breakdown by range:`); + + highTrafficRanges.forEach((range) => { + const pathsInRange = highTrafficPaths.filter((p) => p.views >= range.min && p.views <= range.max); + if (pathsInRange.length > 0) { + const viewsInRange = pathsInRange.reduce((sum, p) => sum + p.views, 0); + console.log( + ` ${range.min}-${range.max} views: ${pathsInRange.length} paths (${viewsInRange.toLocaleString()} views)` + ); + } + }); + + // Medium traffic + console.log(' Medium traffic (10-99 views):'); + console.log( + ` Total Paths: ${mediumTrafficPaths.length} (${((mediumTrafficPaths.length / totalPaths) * 100).toFixed(2)}% of paths)` + ); + console.log( + ` Total Views: ${mediumTrafficViews.toLocaleString()} (${((mediumTrafficViews / totalViews) * 100).toFixed(2)}% of views)` + ); + + // Low traffic + console.log(' Low traffic (1-9 views):'); + console.log( + ` Total Paths: ${lowTrafficPaths.length} (${((lowTrafficPaths.length / totalPaths) * 100).toFixed(2)}% of paths)` + ); + console.log( + ` Total Views: ${lowTrafficViews.toLocaleString()} (${((lowTrafficViews / totalViews) * 100).toFixed(2)}% of views)` + ); + + // Zero views + console.log(' Zero views:'); + console.log( + ` Total Paths: ${zeroTrafficPaths.length} (${((zeroTrafficPaths.length / totalPaths) * 100).toFixed(2)}% of paths)` + ); + console.log(); } function printLongTailAnalysis() { - const sorted = [...pages].sort((a, b) => b.views - a.views); - const totalViews = getTotalViews(); - const totalPaths = pages.length; - - console.log('Long Tail Analysis'); - console.log('Understanding the distribution of low-traffic pages'); - console.log('Results:'); - - const singleDigitViews = sorted.filter(p => p.views < 10 && p.views > 0); - const singleDigitViewsTotal = singleDigitViews.reduce((sum, p) => sum + p.views, 0); - const singleDigitPercentage = (singleDigitViewsTotal / totalViews * 100).toFixed(2); - - console.log(` Paths with 1-9 views:`); - console.log(` Count: ${singleDigitViews.length} (${(singleDigitViews.length / totalPaths * 100).toFixed(2)}% of paths)`); - console.log(` Total Views: ${singleDigitViewsTotal.toLocaleString()} (${singleDigitPercentage}% of total views)`); - - const zeroViews = sorted.filter(p => p.views === 0); - console.log(` Paths with 0 views:`); - console.log(` Count: ${zeroViews.length} (${(zeroViews.length / totalPaths * 100).toFixed(2)}% of paths)`); - console.log(); + const sorted = [...pages].sort((a, b) => b.views - a.views); + const totalViews = getTotalViews(); + const totalPaths = pages.length; + + console.log('Long Tail Analysis'); + console.log('Understanding the distribution of low-traffic pages'); + console.log('Results:'); + + const singleDigitViews = sorted.filter((p) => p.views < 10 && p.views > 0); + const singleDigitViewsTotal = singleDigitViews.reduce((sum, p) => sum + p.views, 0); + const singleDigitPercentage = ((singleDigitViewsTotal / totalViews) * 100).toFixed(2); + + console.log(` Paths with 1-9 views:`); + console.log( + ` Count: ${singleDigitViews.length} (${((singleDigitViews.length / totalPaths) * 100).toFixed(2)}% of paths)` + ); + console.log(` Total Views: ${singleDigitViewsTotal.toLocaleString()} (${singleDigitPercentage}% of total views)`); + + const zeroViews = sorted.filter((p) => p.views === 0); + console.log(` Paths with 0 views:`); + console.log(` Count: ${zeroViews.length} (${((zeroViews.length / totalPaths) * 100).toFixed(2)}% of paths)`); + console.log(); } function printRedirectStrategyData() { - const percentages = getCumulativeViewPercentages(); - const totalPaths = pages.length; - - console.log('Redirect Strategy Quick Reference'); - console.log('Key data points for redirect planning'); - console.log('Results:'); - - [10, 25, 50, 100, 200].forEach(n => { - if (n <= totalPaths) { - const data = percentages[n - 1]; - const percentage = data.cumulativePercentage.toFixed(2); - console.log(` Top ${n} paths: ${percentage}% of views`); - } - }); - console.log(); + const percentages = getCumulativeViewPercentages(); + const totalPaths = pages.length; + + console.log('Redirect Strategy Quick Reference'); + console.log('Key data points for redirect planning'); + console.log('Results:'); + + [10, 25, 50, 100, 200].forEach((n) => { + if (n <= totalPaths) { + const data = percentages[n - 1]; + const percentage = data.cumulativePercentage.toFixed(2); + console.log(` Top ${n} paths: ${percentage}% of views`); + } + }); + console.log(); } // Run all analyses @@ -276,4 +296,4 @@ printRedirectStrategyData(); printTopPages(25); printRootPathAnalysis(); printViewCountDistribution(); -printLongTailAnalysis(); \ No newline at end of file +printLongTailAnalysis(); diff --git a/sidebarsReference.ts b/sidebarsReference.ts index eb596dbb..45253956 100644 --- a/sidebarsReference.ts +++ b/sidebarsReference.ts @@ -3,8 +3,9 @@ import type { SidebarsConfig } from '@docusaurus/plugin-content-docs'; const sidebars: SidebarsConfig = { referenceSidebar: [ { - type: 'autogenerated', dirName: '.' - } + type: 'autogenerated', + dirName: '.', + }, ], }; diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index 162f394e..c0a5041e 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -15,6 +15,7 @@ This document outlines the concrete steps for migrating Harper v4 documentation ## Part 1: Initial Content Generation (AI-Driven) ### Overview + AI agents work through the migration map, creating PRs for each top-level section. Each PR adds new files without removing anything from `versioned_docs/`. ### Agent Instructions @@ -26,6 +27,7 @@ For each section in the migration map, the agent should: 3. **Read relevant release notes** from `release_notes/` for version annotations 4. **Generate new reference files** following the structure in the reference plan 5. **Add inline source comments** documenting what was used: + ```markdown @@ -33,8 +35,9 @@ For each section in the migration map, the agent should: ``` 6. **Use link placeholders** for cross-references that don't exist yet: + ```markdown - [JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md "Will be created in security section") + [JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md 'Will be created in security section') ``` **IMPORTANT**: After generating all files in the section, replace TODO placeholders with relative paths for internal section links: @@ -46,6 +49,7 @@ For each section in the migration map, the agent should: - Store in `migration-context/link-placeholders/` - Named by section: `cli-link-placeholders.md`, `security-link-placeholders.md`, etc. - Format: + ```markdown # Link Placeholders for [Section Name] @@ -61,6 +65,7 @@ For each section in the migration map, the agent should: ``` 8. **Add version annotations** using the strategy defined in reference plan: + ```markdown ## Relationships @@ -77,9 +82,11 @@ For each section in the migration map, the agent should: 9. **Note conflicts and uncertainties** in PR description 10. **Handle images/assets** with placeholders: + ```markdown + ![Architecture Diagram](TODO:IMAGE) ``` @@ -88,20 +95,21 @@ For each section in the migration map, the agent should: - List all pages in the appropriate order - Match the pattern from `sidebarsLearn.ts` (non-collapsible with `className: "learn-category-header"`) - Example: + ```json { - "type": "category", - "label": "CLI", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "cli/overview", - "label": "Overview" - }, - // ... - ] + "type": "category", + "label": "CLI", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "cli/overview", + "label": "Overview" + } + // ... + ] } ``` @@ -113,6 +121,7 @@ For each section in the migration map, the agent should: - Use `git commit --fixup ` for subsequent changes - This allows easy squashing later while keeping development history clear - Example: + ```bash # Initial commit git add reference_versioned_docs/version-v4/cli/*.md @@ -122,6 +131,7 @@ For each section in the migration map, the agent should: git add reference_versioned_sidebars/version-v4-sidebars.json git commit --fixup HEAD ``` + - PRs will be squash-merged to maintain clean history on main branch 14. **Create PR** with comprehensive description (template below) @@ -132,9 +142,11 @@ For each section in the migration map, the agent should: # [Section Name] Migration ## Summary + Migration of [section name] documentation from versioned_docs into new reference structure. ## Files Created + - reference_versioned_docs/version-v4/[section]/overview.md - reference_versioned_docs/version-v4/[section]/page1.md - reference_versioned_docs/version-v4/[section]/page2.md @@ -142,49 +154,60 @@ Migration of [section name] documentation from versioned_docs into new reference ## Source Files Used ### reference_versioned_docs/version-v4/[section]/overview.md + - `versioned_docs/version-4.7/path/to/file.md` (primary source) - `versioned_docs/version-4.2/path/to/file.md` (for baseline features) - `release_notes/4.3.0.md` (feature introduction dates) ### reference_versioned_docs/version-v4/[section]/page1.md + - `versioned_docs/version-4.7/path/to/another.md` (primary) - ... ## Version Annotations Added ### High Confidence (Confirmed via release notes) + - Feature X: Added in v4.3.0 - Feature Y: Changed in v4.4.0 ### Needs Verification + - Feature Z: Likely added in v4.3.0 (inferred from version comparison) - Config option ABC: Possibly changed in v4.5.0 (mentioned in docs but not in release notes) ## Link Placeholders Created + See `migration-context/link-placeholders/[section]-link-placeholders.md` for complete list. Summary: + - 12 placeholders to operations-api section - 5 placeholders to security section - 3 placeholders to configuration section ## Images/Assets Noted + - Line 45 of overview.md: TODO-IMAGE for architecture diagram - Line 123 of page1.md: TODO-IMAGE for flow chart ## Conflicts & Questions for Human Review ### Content Conflicts + None (reference/ directory was reset) ### Uncertainties + - Unclear if Feature Z was introduced in v4.3.0 or v4.4.0 - marked for verification - Configuration option `foo.bar` mentioned in v4.5 docs but not in earlier versions or release notes ## Migration Map Status + Updated status for this section to "In Progress" ## Checklist for Human Reviewer + - [ ] Verify version annotations marked as "needs verification" - [ ] Review content accuracy and completeness - [ ] Check inline source comments are accurate @@ -239,9 +262,9 @@ Based on migration map and reference plan, recommend this order. Each section is - `api.md` - `operations.md` -6. **Analytics** (`reference_versioned_docs/version-v4/analytics/`) - - `overview.md` - - `operations.md` +6. **Analytics** (`reference_versioned_docs/version-v4/analytics/`) + - `overview.md` + - `operations.md` **Phase 1C - Complex Sections** @@ -354,18 +377,22 @@ Once all Part 1 PRs are merged, resolve link placeholders. # Link Resolution: [Section Name] ## Summary + Resolved link placeholders in [section name] now that target pages exist. ## Links Resolved + - `reference_versioned_docs/version-v4/[section]/file1.md` line 45: JWT Auth → `../security/jwt-authentication.md` - `reference_versioned_docs/version-v4/[section]/file1.md` line 67: Operations → `../operations-api/operations.md` - ... (X total links resolved) ## Links Unable to Resolve + - `reference_versioned_docs/version-v4/[section]/file2.md` line 123: Target `TODO:reference_versioned_docs/version-v4/foo/bar.md` doesn't exist - Recommendation: This might be a typo, should probably link to `../foo/baz.md` instead ## Checklist + - [ ] Human verify resolved links are correct - [ ] Human resolve any unresolvable links - [ ] Delete corresponding `migration-context/link-placeholders/[section]-link-placeholders.md` after merge @@ -382,6 +409,7 @@ Update other parts of documentation that reference the old structure. **Task**: Update internal links in release notes to point to new structure. **Agent Instructions**: + 1. Scan all files in `release_notes/` 2. Find links to old paths (e.g., `/docs/4.7/...`, `/docs/developers/...`) 3. Map to new paths based on migration map @@ -392,6 +420,7 @@ Update other parts of documentation that reference the old structure. **Task**: Update links in learn guides to point to new reference structure. **Agent Instructions**: + 1. Scan all files in `learn/` 2. Find links to old reference paths 3. Map to new paths @@ -402,6 +431,7 @@ Update other parts of documentation that reference the old structure. **Task**: Find and update any other references to old paths. **Agent Instructions**: + 1. Search entire repo for common old path patterns 2. Update as appropriate 3. Create PR with updates @@ -425,6 +455,7 @@ Configure redirects from old paths to new paths. ### Redirect Priority Focus on: + 1. Most visited pages (if analytics data available) 2. All `/docs/4.7/` paths (current latest) 3. Common paths across v4.2-v4.6 (many are duplicates) @@ -437,6 +468,7 @@ Focus on: ### 7.1: Orphaned Content Review **Human Task**: + 1. Review "Files Being Removed" section in migration map 2. Confirm these files are intentionally not migrated 3. Document decision (move to legacy, move to learn, delete entirely) @@ -444,6 +476,7 @@ Focus on: ### 7.2: Remove Old Content **After all above steps complete**: + 1. Create PR that removes old `versioned_docs/version-4.X/` folders 2. Only do this after confirming: - All content is migrated or intentionally deprecated @@ -454,6 +487,7 @@ Focus on: ### 7.3: Final Validation **Human Task**: + 1. Build documentation locally 2. Spot check various pages 3. Test redirects @@ -463,11 +497,12 @@ Focus on: ### 7.4: Merge to Main Once everything on `major-version-reorg` branch is complete: + 1. Final review of entire branch -3. Squash/organize commits if needed -4. Format -5. Merge to `main` -6. Deploy +2. Squash/organize commits if needed +3. Format +4. Merge to `main` +5. Deploy --- @@ -476,6 +511,7 @@ Once everything on `major-version-reorg` branch is complete: ### Files Agents Should Reference **Primary**: + - `v4-docs-migration-map.md` - The authoritative source for what goes where - `v4-docs-reference-plan.md` - Understanding structure and philosophy - `versioned_docs/version-4.X/**/*.md` - Source content @@ -485,6 +521,7 @@ Once everything on `major-version-reorg` branch is complete: ### Agent Constraints **DO**: + - Add new files to `reference_versioned_docs/version-v4/` - Include inline source comments - Use link placeholders with TODO: prefix @@ -494,6 +531,7 @@ Once everything on `major-version-reorg` branch is complete: - Update migration-map.md status **DO NOT**: + - Remove anything from `versioned_docs/` (wait until Part 7) - Add files to `reference/` (that's for v5 later) - Guess at version annotations without noting confidence @@ -503,30 +541,36 @@ Once everything on `major-version-reorg` branch is complete: ### Link Placeholder Format **Standard format**: + ```markdown -[Link Text](TODO:reference_versioned_docs/version-v4/section/page.md "Optional description of expected target") +[Link Text](TODO:reference_versioned_docs/version-v4/section/page.md 'Optional description of expected target') ``` **For images**: + ```markdown + ![Alt text](TODO:IMAGE) ``` ### Version Annotation Format **High confidence**: + ```markdown Added in: v4.3.0 ``` **Needs verification**: + ```markdown Added in: v4.3.0 (inferred from version comparison, needs verification) ``` **Changed features**: + ```markdown Changed in: v4.4.0 @@ -535,6 +579,7 @@ In previous versions: [Describe old behavior] ``` **Deprecated features**: + ```markdown Deprecated in: v4.X.0 (moved to legacy in v4.7+) diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index df51b8ca..97466770 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -18,6 +18,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## CLI Section ### `reference/cli/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: - `versioned_docs/version-4.1/cli.md` (for baseline features) @@ -30,6 +31,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Dev mode (`harperdb dev`, `harperdb run`) ### `reference/cli/commands.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: Compare all versions for command evolution - **Version Annotations**: Each command should note its introduction version @@ -39,6 +41,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Foreground mode changes ### `reference/cli/operations-api-commands.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: `versioned_docs/version-4.3+` (CLI ops api support added in v4.3) - **Version Annotations**: Note v4.3.0 introduction @@ -47,6 +50,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API ### `reference/cli/authentication.md` + - **Primary Source**: New content or extract from CLI docs - **Status**: In Progress @@ -55,6 +59,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Configuration Section ### `reference/configuration/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` - **Additional Sources**: - Current `reference/configuration.md` @@ -66,6 +71,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Developer/production mode ### `reference/configuration/options.md` + - **Primary Source**: Current `reference/configuration.md` (very comprehensive) - **Additional Sources**: Compare all version-X/deployments/configuration.md files - **Merge Required**: Yes - configuration options added across versions @@ -75,6 +81,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Release Notes**: Major config changes across many versions - see all major releases ### `reference/configuration/operations.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/configuration.md` - **Additional Sources**: Earlier versions for feature evolution - **Version Annotations**: Track when ops were added @@ -85,12 +92,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Operations API Section ### `reference/operations-api/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/index.md` - **Additional Sources**: - `versioned_docs/version-4.2/developers/operations-api/index.md` (first structured ops api section) - **Status**: Not Started ### `reference/operations-api/operations.md` + - **Primary Source**: Synthesize from all `versioned_docs/version-4.7/developers/operations-api/*.md` files - **Merge Required**: Yes - comprehensive list linking to primary references - **Version Annotations**: Each operation needs version introduced @@ -102,24 +111,28 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Security Section ### `reference/security/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/index.md` - **Additional Sources**: - `versioned_docs/version-4.7/developers/security/configuration.md` - **Status**: Not Started ### `reference/security/basic-authentication.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/basic-auth.md` - **Additional Sources**: `versioned_docs/version-4.1/security/basic-authentication.md` - **Version Annotations**: Available since v4.1.0 - **Status**: Not Started ### `reference/security/jwt-authentication.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/jwt-auth.md` - **Additional Sources**: `versioned_docs/version-4.1/security/jwt.md` - **Version Annotations**: Available since v4.1.0 - **Status**: Not Started ### `reference/security/mtls-authentication.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/mtls-auth.md` - **Additional Sources**: `versioned_docs/version-4.3/developers/security/mtls-auth.md` - **Version Annotations**: Added in v4.3.0 @@ -128,6 +141,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS support added ### `reference/security/certificate-management.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-management.md` - **Additional Sources**: - `versioned_docs/version-4.1/security/certificate-management.md` @@ -140,6 +154,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Certificate revocation ### `reference/security/certificate-verification.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-verification.md` - **Version Annotations**: Added in v4.7.0 (OCSP support) - **Status**: Not Started @@ -147,14 +162,17 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.7.0](release-notes/v4-tucker/4.7.0.md) - OCSP support ### `reference/security/cors.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/configuration.md` - **Status**: Not Started ### `reference/security/ssl.md` + - **Primary Source**: Extract from security/configuration or certificate management docs - **Status**: Not Started ### `reference/security/users-and-roles.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/security/users-and-roles.md` - **Additional Sources**: - `versioned_docs/version-4.7/developers/operations-api/users-and-roles.md` @@ -171,6 +189,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Components Section ### `reference/components/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/components/index.md` - **Additional Sources**: - `versioned_docs/version-4.1/custom-functions/*` (for evolution context) @@ -190,6 +209,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API with dynamic reloading ### `reference/components/applications.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/components/applications.md` - **Additional Sources**: - `versioned_docs/version-4.7/developers/applications/*.md` @@ -200,6 +220,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture, NPM/GitHub deployment ### `reference/components/extension-api.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/components/extensions.md` - **Additional Sources**: Current `reference/components/extensions.md` - **Version Annotations**: Extension API formalized around v4.4-4.5 @@ -208,6 +229,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API ### `reference/components/plugin-api.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/components/plugins.md` - **Additional Sources**: Current `reference/components/plugins.md` - **Version Annotations**: Added in v4.6.0 @@ -221,6 +243,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Database Section ### `reference/database/overview.md` + - **Primary Source**: New content synthesizing how database system works - **Additional Sources**: - `versioned_docs/version-4.7/reference/architecture.md` @@ -231,6 +254,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Database structure changes (single file per database) ### `reference/database/schema.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/applications/defining-schemas.md` - **Additional Sources**: - `versioned_docs/version-4.7/reference/data-types.md` @@ -258,6 +282,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Vector indexing (HNSW) ### `reference/database/data-loader.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/applications/data-loader.md` - **Additional Sources**: Current `reference/data-loader.md` - **Version Annotations**: Added in v4.5.0 @@ -266,6 +291,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Data loader introduced ### `reference/database/storage-algorithm.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/storage-algorithm.md` - **Additional Sources**: Current `reference/storage-algorithm.md` - **Status**: Not Started @@ -273,6 +299,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Storage performance improvements, compression by default ### `reference/database/jobs.md` + - **Primary Source**: `versioned_docs/version-4.7/administration/jobs.md` - **Additional Sources**: - `versioned_docs/version-4.7/developers/operations-api/jobs.md` @@ -281,12 +308,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Status**: Not Started ### `reference/database/system-tables.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` - **Additional Sources**: Current `reference/analytics.md` - **Status**: Not Started - **Notes**: System tables for analytics and other features ### `reference/database/compaction.md` + - **Primary Source**: `versioned_docs/version-4.7/administration/compact.md` - **Additional Sources**: Current `reference/compact.md` - **Version Annotations**: Added in v4.3.0 @@ -295,6 +324,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Compact database functionality ### `reference/database/transaction.md` + - **Primary Source**: `versioned_docs/version-4.7/administration/logging/transaction-logging.md` - **Additional Sources**: - `versioned_docs/version-4.7/administration/logging/audit-logging.md` @@ -313,6 +343,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Resources Section ### `reference/resources/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` - **Additional Sources**: Current `reference/resources/` folder - **Status**: Not Started @@ -320,6 +351,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced ### `reference/resources/resource-api.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` - **Additional Sources**: - `versioned_docs/version-4.7/reference/resources/instance-binding.md` @@ -340,6 +372,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Resource API upgrades ### `reference/resources/global-apis.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/globals.md` - **Additional Sources**: - `versioned_docs/version-4.7/reference/transactions.md` @@ -353,6 +386,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Table.getRecordCount() ### `reference/resources/query-optimization.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/resources/query-optimization.md` - **Additional Sources**: Current `reference/resources/query-optimization.md` - **Status**: Not Started @@ -364,6 +398,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Environment Variables Section ### `reference/environment-variables/overview.md` + - **Primary Source**: New content about `loadEnv` plugin - **Additional Sources**: Built-in extensions docs, configuration docs - **Version Annotations**: loadEnv added in v4.5.0 @@ -372,6 +407,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Built-in loadEnv component ### `reference/environment-variables/configuration.md` + - **Primary Source**: Extract from configuration docs or components docs - **Status**: Not Started @@ -380,11 +416,13 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Static Files Section ### `reference/static-files/overview.md` + - **Primary Source**: Extract from built-in plugins/extensions documentation - **Additional Sources**: Current `reference/components/built-in-extensions.md` - **Status**: Not Started ### `reference/static-files/configuration.md` + - **Primary Source**: Extract from configuration docs - **Status**: Not Started @@ -393,6 +431,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## HTTP Section ### `reference/http/overview.md` + - **Primary Source**: New content about HTTP server - **Additional Sources**: Configuration docs, architecture docs - **Status**: Not Started @@ -401,6 +440,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Socket management (SO_REUSEPORT), flexible port configs ### `reference/http/configuration.md` + - **Primary Source**: Extract from `reference/configuration.md` (http section) - **Version Annotations**: - HTTP/2 support: v4.5.0 @@ -409,6 +449,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - HTTP/2 support ### `reference/http/api.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (server global) - **Additional Sources**: Current `reference/globals.md` - **Version Annotations**: @@ -422,6 +463,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## REST Section ### `reference/rest/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/rest.md` - **Additional Sources**: Current `reference/rest.md` - **Status**: Not Started @@ -429,6 +471,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - REST interface introduced ### `reference/rest/querying.md` + - **Primary Source**: Extract from REST docs and NoSQL operations - **Additional Sources**: - `versioned_docs/version-4.7/developers/operations-api/nosql-operations.md` @@ -442,17 +485,20 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Improved URL path parsing, directURLMapping ### `reference/rest/headers.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/headers.md` - **Additional Sources**: Current `reference/headers.md` - **Version Annotations**: Track which headers were added/removed over versions - **Status**: Not Started ### `reference/rest/content-types.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/content-types.md` - **Additional Sources**: Current `reference/content-types.md` - **Status**: Not Started ### `reference/rest/websockets.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` - **Additional Sources**: Current `reference/real-time.md` - **Status**: Not Started @@ -460,6 +506,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.2.0](release-notes/v4-tucker/4.2.0.md) - WebSocket support ### `reference/rest/server-sent-events.md` + - **Primary Source**: Extract from real-time or REST docs - **Status**: Not Started - **Release Notes**: @@ -470,6 +517,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## MQTT Section ### `reference/mqtt/overview.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` - **Additional Sources**: Built-in plugins/extensions docs - **Version Annotations**: @@ -484,6 +532,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Improved message delivery, blob support for MQTT ### `reference/mqtt/configuration.md` + - **Primary Source**: Extract from configuration docs and real-time docs - **Version Annotations**: Port change v4.5.0 (9925 → 9933) - **Status**: Not Started @@ -495,6 +544,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Logging Section ### `reference/logging/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/administration/logging/index.md` - **Additional Sources**: Current `reference/logging.md` (if exists) - **Status**: Not Started @@ -503,6 +553,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Major logging improvements ### `reference/logging/configuration.md` + - **Primary Source**: Extract from configuration docs - **Version Annotations**: - Per-component logging: v4.6.0 @@ -512,12 +563,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Per-component logging, dynamic reloading, HTTP logging ### `reference/logging/api.md` + - **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (logger global) - **Status**: Not Started - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Logger based on Node.js Console API ### `reference/logging/operations.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/logs.md` - **Status**: Not Started - **Notes**: Operations for managing standard logs (not transaction/audit logs, which moved to database section) @@ -527,6 +580,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Analytics Section ### `reference/analytics/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` - **Additional Sources**: Current `reference/analytics.md` - **Version Annotations**: @@ -538,6 +592,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.7.0](release-notes/v4-tucker/4.7.0.md) - New analytics and licensing functionality ### `reference/analytics/operations.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/analytics.md` - **Status**: Not Started @@ -546,6 +601,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Replication Section ### `reference/replication/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/replication/index.md` - **Additional Sources**: Current `reference/replication/` (if exists) - **Version Annotations**: @@ -556,6 +612,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information, improved replication timestamps ### `reference/replication/clustering.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/clustering/index.md` - **Additional Sources**: - All `versioned_docs/version-4.7/reference/clustering/*.md` files @@ -570,6 +627,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information ### `reference/replication/sharding.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/replication/sharding.md` - **Version Annotations**: - Sharding: v4.4.0 @@ -584,6 +642,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## GraphQL Querying Section ### `reference/graphql-querying/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/reference/graphql.md` - **Additional Sources**: Current `reference/graphql.md` - **Version Annotations**: @@ -601,6 +660,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Studio Section ### `reference/studio/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` (localStudio configuration) - **Status**: In Progress - **Notes**: Simple overview page focusing on: @@ -617,6 +677,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Fastify Routes Section ### `reference/fastify-routes/overview.md` + - **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` - **Additional Sources**: Current `reference/define-routes.md` - **Status**: In Progress @@ -627,18 +688,21 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Legacy Section ### `reference/legacy/cloud/` + - **Primary Source**: `versioned_docs/version-4.7/administration/harper-studio/*` - **Additional Sources**: `versioned_docs/version-4.7/deployments/harper-cloud/*` - **Status**: In Progress - **Notes**: The primary and additional sources are to be completely removed and this section is to act as a basic landing page to direct users to Fabric instead. ### `reference/legacy/custom-functions/` + - **Primary Source**: `versioned_docs/version-4.1/custom-functions/*` - **Additional Sources**: `versioned_docs/version-4.7/developers/operations-api/custom-functions.md` - **Status**: N/A - **Notes**: Move as-is with deprecation notice pointing to Components ### `reference/legacy/sql/` + - **Primary Source**: `versioned_docs/version-4.7/reference/sql-guide/*` - **Additional Sources**: - `versioned_docs/version-4.7/developers/operations-api/sql-operations.md` @@ -651,6 +715,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ## Files Requiring Special Attention ### High Priority Merges + These files require careful merging from multiple sources: 1. **Configuration Options** (`reference/configuration/options.md`) @@ -678,9 +743,11 @@ These files require careful merging from multiple sources: - Significant API changes in v4.4 ### Files Being Removed/Ignored + These exist in current docs but won't exist in new structure: **To be moved to Learn guides:** + - `versioned_docs/version-4.7/administration/administration.md` - Generic admin intro - `versioned_docs/version-4.7/administration/cloning.md` - Move to Learn guide - `versioned_docs/version-4.7/developers/applications/debugging.md` - Move to Learn guide @@ -692,15 +759,17 @@ These exist in current docs but won't exist in new structure: - `versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md` - Move to Learn guide **To be ignored (obsolete content):** + - `versioned_docs/version-4.7/reference/index.md` - Generic intro page - `versioned_docs/version-4.7/reference/limits.md` - Fold into database/overview or schema - `versioned_docs/version-4.7/administration/harper-studio/` - direct users to fabric - `versioned_docs/version-4.7/deployments/harper-cloud/` - direct users to fabric ### Cross-References to Update + Files that heavily reference paths that will change: -- All operations-api/*.md files reference other sections +- All operations-api/\*.md files reference other sections - Security files cross-reference operations and configuration - Components files reference configuration and operations - Clustering files extensively cross-reference @@ -710,6 +779,7 @@ Files that heavily reference paths that will change: ## Version Annotation Checklist For each file migrated, ensure: + - [ ] Features note their introduction version - [ ] Changed behaviors note the version they changed - [ ] Deprecated features note deprecation version @@ -736,11 +806,13 @@ The `release-notes/v4-tucker/` directory contains 169 release note files coverin When adding version annotations, refer to these key features and their introduction versions: **CLI & Configuration** + - CLI expansion with operations API: 4.3.0 - Dev mode (`harperdb dev`): 4.2.0 - Configuration improvements: 4.3.0, 4.4.0 **Security** + - mTLS support: 4.3.0 - Dynamic certificate management: 4.4.0 - OCSP support: 4.7.0 @@ -748,6 +820,7 @@ When adding version annotations, refer to these key features and their introduct - Certificate revocation: 4.5.0 **Components & Extensions** + - Component architecture: 4.2.0 - Custom functions with worker threads: 4.1.0 - New extension API: 4.6.0 @@ -755,6 +828,7 @@ When adding version annotations, refer to these key features and their introduct - Built-in loadEnv component: 4.5.0 **Database & Schema** + - Configurable schemas (GraphQL syntax): 4.2.0 - Relationships and joins: 4.3.0 - Computed properties: 4.4.0 @@ -766,6 +840,7 @@ When adding version annotations, refer to these key features and their introduct - Auto-incrementing primary keys: 4.4.0 **Data Access** + - Resource API: 4.2.0 - CRDT support: 4.3.0 - Response object support: 4.4.0 @@ -774,6 +849,7 @@ When adding version annotations, refer to these key features and their introduct - Iterator-based queries: 4.1.0 **REST & HTTP** + - REST interface: 4.2.0 - HTTP/2 support: 4.5.0 - Improved URL path parsing: 4.5.0 @@ -781,6 +857,7 @@ When adding version annotations, refer to these key features and their introduct - Worker threads for HTTP: 4.1.0 **Real-Time & MQTT** + - MQTT support: 4.2.0 - WebSocket support: 4.2.0 - Server-Sent Events: 4.2.0 @@ -790,6 +867,7 @@ When adding version annotations, refer to these key features and their introduct - Improved message delivery: 4.5.0 **Replication & Clustering** + - Native replication (Plexus): 4.4.0 - Sharding: 4.4.0 - Expanded sharding functionality: 4.5.0 @@ -797,17 +875,20 @@ When adding version annotations, refer to these key features and their introduct - Replicated operations: 4.4.0 **Logging** + - Logging consolidated to hdb.log: 4.1.0 - Per-component logging: 4.6.0 - Dynamic logging reload: 4.6.0 - HTTP logging: 4.6.0 **GraphQL** + - OpenAPI specification: 4.3.0 - Native GraphQL support (provisional): 4.4.0 - GraphQL disabled by default: 4.5.0 **Storage & Performance** + - Database structure (single file): 4.2.0 - Storage performance improvements: 4.3.0 - Compression by default: 4.3.0 @@ -815,10 +896,12 @@ When adding version annotations, refer to these key features and their introduct - Storage reclamation: 4.5.0 **Analytics** + - Resource and storage analytics: 4.5.0 - New analytics/licensing: 4.7.0 **Studio** + - Local studio upgrade: 4.3.0 ## Notes diff --git a/v4-docs-project-brief.md b/v4-docs-project-brief.md index 30ace1b2..1ff84317 100644 --- a/v4-docs-project-brief.md +++ b/v4-docs-project-brief.md @@ -31,6 +31,7 @@ This consolidation will improve documentation maintainability, make features mor ## Current Status ### Phase Status + - ✅ Planning & Documentation Complete - ⏸️ Team Review Pending - ⏳ Pilot Execution Not Started @@ -40,6 +41,7 @@ This consolidation will improve documentation maintainability, make features mor ### Sections Status (0/20 Complete) **Phase 1A - Simple** (0/5) + - [ ] CLI - [ ] Content Types - [ ] Headers @@ -47,6 +49,7 @@ This consolidation will improve documentation maintainability, make features mor - [ ] Studio **Phase 1B - Medium** (0/7) + - [ ] Security - [ ] Environment Variables - [ ] Static Files @@ -56,6 +59,7 @@ This consolidation will improve documentation maintainability, make features mor - [ ] Analytics **Phase 1C - Complex** (0/5) + - [ ] REST - [ ] Replication - [ ] Database @@ -63,13 +67,16 @@ This consolidation will improve documentation maintainability, make features mor - [ ] Components **Phase 1D - Cross-Cutting** (0/2) + - [ ] Operations API - [ ] Configuration **Phase 1E - Legacy** (0/1) + - [ ] Legacy Content ### Metrics + - **PRs Opened**: 0/20 - **PRs Merged**: 0/20 - **Link Placeholders Created**: 0 @@ -82,36 +89,43 @@ This consolidation will improve documentation maintainability, make features mor ## Key Decisions Log ### 2026-02-18: Initial Planning + - **Decision**: Use VSCode + Claude Code approach (vs fully automated Agent SDK) - **Rationale**: Provides visibility and control; can pivot to automation if needed - **Impact**: Requires manual orchestration but allows quality validation throughout ### 2026-02-18: Target Directory + - **Decision**: Output to `reference_versioned_docs/version-v4/` not `reference/` - **Rationale**: Clean separation; `reference/` will be used for v5 kickstart later - **Impact**: Additional step required later to copy to `reference/` for v5 ### 2026-02-18: Transaction Logging Reorganization + - **Decision**: Move transaction/audit logging from `logging/` to `database/` - **Rationale**: Transaction logging is a database-level concern, not application logging - **Impact**: Better conceptual organization; `logging/` focuses on app/system logs ### 2026-02-18: Link Placeholder Strategy + - **Decision**: Use `TODO:path` format in actual markdown links with per-section tracker files - **Rationale**: Easy to find/replace, works with markdown parsers, no merge conflicts - **Impact**: Separate cleanup phase needed to resolve placeholders ### 2026-02-18: Complete Sections in Single PRs + - **Decision**: Don't split large sections (like Configuration) into multiple PRs - **Rationale**: Easier to review section holistically; context is preserved - **Impact**: Some PRs will be large but provide complete picture ### 2026-02-18: Pilot-First Approach + - **Decision**: Run CLI and Security as pilots before scaling - **Rationale**: Validate quality and process before committing to full migration - **Impact**: Adds ~2-3 days upfront but reduces risk of rework ### 2026-02-19: Temporary Build Simplifications + - **Decision**: Temporarily disable local search plugin and set `onBrokenLinks: 'warn'` - **Rationale**: Allows build to succeed during migration while reference docs are being populated - **Impact**: Must remember to re-enable before merging to main: @@ -127,22 +141,25 @@ This consolidation will improve documentation maintainability, make features mor ## Known Issues & Blockers ### Current Blockers -*None - ready to begin execution* + +_None - ready to begin execution_ ### Potential Risks + 1. **Version annotation accuracy** - AI might infer wrong introduction versions - - *Mitigation*: Confidence levels + human verification + release notes validation + - _Mitigation_: Confidence levels + human verification + release notes validation 2. **Content quality variability** - Some sections might need significant editing - - *Mitigation*: Pilot sections first; refine prompts based on learnings + - _Mitigation_: Pilot sections first; refine prompts based on learnings 3. **Review capacity** - Team might be overwhelmed by 20 large PRs - - *Mitigation*: Flexible timeline; can slow down review velocity as needed + - _Mitigation_: Flexible timeline; can slow down review velocity as needed 4. **Link placeholder confusion** - Placeholders might be unclear or incorrect - - *Mitigation*: Clear format specification; dedicated cleanup phase + - _Mitigation_: Clear format specification; dedicated cleanup phase ### Watch Items + - [ ] Current `reference/` and `reference_versioned_docs/version-v4/` directories are empty (confirmed reset) - [ ] All planning documents are up to date - [ ] Team has capacity for 2-3 PR reviews per day @@ -153,6 +170,7 @@ This consolidation will improve documentation maintainability, make features mor ## Upcoming Milestones ### Next Steps (Immediate) + 1. **Present to team** - Review all planning docs, get feedback and buy-in 2. **Environment setup** - Verify VSCode, Claude Code, gh CLI ready 3. **Create tracking issue** - Set up GitHub issue for progress tracking @@ -160,18 +178,21 @@ This consolidation will improve documentation maintainability, make features mor 5. **Team sync** - Review pilot results, refine approach ### Near-Term Milestones (Next 2 Weeks) + - [ ] Pilot sections complete (CLI + Security) - [ ] Decision on scaling approach (continue VSCode or build automation) - [ ] Phase 1A complete (5 simple sections) - [ ] Phase 1B started (medium complexity sections) ### Medium-Term Milestones (Next 4 Weeks) + - [ ] All 20 sections have PRs merged - [ ] Link resolution complete - [ ] Cross-references updated - [ ] Sidebars configured ### Long-Term Milestones (Next 6 Weeks) + - [ ] Redirects configured - [ ] Old versioned_docs removed - [ ] Final validation complete @@ -184,6 +205,7 @@ This consolidation will improve documentation maintainability, make features mor **Project Goal**: Migrate v4 docs from `versioned_docs/version-4.X/` → `reference_versioned_docs/version-v4/` with restructuring. **Your Role**: Generate initial content drafts by: + 1. Reading migration map entry for assigned section 2. Reading all source files listed (primary + additional) 3. Reading release notes for version info @@ -192,6 +214,7 @@ This consolidation will improve documentation maintainability, make features mor 6. Creating branch, committing, opening PR **Key Constraints**: + - ✅ DO add files to `reference_versioned_docs/version-v4/` - ✅ DO include inline source comments - ✅ DO use `TODO:path` format for link placeholders @@ -201,6 +224,7 @@ This consolidation will improve documentation maintainability, make features mor - ❌ DON'T guess at version dates without noting uncertainty **Key Files to Reference**: + - `v4-docs-migration-map.md` - Your primary instruction source (which files to read, where to write) - `v4-docs-implementation-plan.md` - Detailed agent instructions (Part 1) - `v4-docs-reference-plan.md` - Target structure and philosophy @@ -209,6 +233,7 @@ This consolidation will improve documentation maintainability, make features mor **PR Template**: See `v4-docs-implementation-plan.md` Part 1 for complete template. **Success Criteria**: + - All files in correct location with proper structure - Inline source comments on all content - Version annotations with confidence levels @@ -221,20 +246,24 @@ This consolidation will improve documentation maintainability, make features mor ## Team Assignments ### Project Lead + - **Name**: Ethan - **Responsibilities**: Overall coordination, decision making, pilot execution ### Reviewers -*TBD after team discussion* + +_TBD after team discussion_ ### Execution Assignments -*To be determined after pilot phase* + +_To be determined after pilot phase_ --- ## Notes & Learnings ### Planning Phase Insights + - Horizontal consolidation (v4.1→v4.7) + vertical reorganization (role-based→feature-based) are parallel transformations - Starting with v4.7 as base and annotating backwards is more efficient than building forward from v4.1 - Migration map revealed several complex merges (Configuration 59KB, Schema from 5+ files, Clustering 10+ files) @@ -242,19 +271,23 @@ This consolidation will improve documentation maintainability, make features mor - Current `reference/` folder was already partially reorganized (work in progress) ### Process Improvements -*To be filled in as we learn from pilots and execution* + +_To be filled in as we learn from pilots and execution_ ### Template Refinements -*To be filled in as we refine prompts based on pilot results* + +_To be filled in as we refine prompts based on pilot results_ ### Common Issues -*To be filled in as patterns emerge during execution* + +_To be filled in as patterns emerge during execution_ --- ## Change Log ### 2026-02-18 - Project Initialization + - Created all planning documents - Completed migration map (20 sections, ~100+ files mapped) - Defined reference structure and philosophy @@ -266,18 +299,21 @@ This consolidation will improve documentation maintainability, make features mor ## Future Considerations ### Post-Migration Tasks (Out of Scope for Now) + - Copy content from `reference_versioned_docs/version-v4/` to `reference/` to kickstart v5 - Begin v5 documentation structure planning - Consider automation for future minor version consolidations - Evaluate if this approach works for v3 historical docs ### Process Improvements for Next Time + - Could build Agent SDK automation upfront if this approach proves successful - Template-based content generation for consistent structure - Automated version annotation extraction from git history - Automated redirect generation from sitemap analysis ### Documentation Enhancements + - Consider adding diagrams/flowcharts to planning docs - Video walkthrough of the process for future team members - Automated progress dashboard from migration map status fields @@ -287,6 +323,7 @@ This consolidation will improve documentation maintainability, make features mor ## Quick Reference ### Directory Structure + ``` documentation/ ├── versioned_docs/ @@ -306,6 +343,7 @@ documentation/ ``` ### Common Commands + ```bash # Switch to migration branch git checkout major-version-reorg @@ -327,6 +365,7 @@ grep "Status:" v4-docs-migration-map.md ``` ### Key Metrics to Track + - Sections complete: `X/20` - PRs open: `X` - PRs merged: `X` @@ -339,12 +378,14 @@ grep "Status:" v4-docs-migration-map.md ## Questions & Decisions Needed ### Before Pilot + - [ ] Team reviewed all planning docs? - [ ] Reviewers assigned for pilot sections? - [ ] GitHub tracking issue created? - [ ] Environment setup verified? ### After Pilot + - [ ] Is content quality acceptable? - [ ] Are version annotations accurate? - [ ] Is link placeholder format working? @@ -352,18 +393,21 @@ grep "Status:" v4-docs-migration-map.md - [ ] Any prompt refinements needed? ### Before Scaling + - [ ] Pilot learnings documented? - [ ] Prompts refined based on pilot? - [ ] Review assignments made? - [ ] Ready to open 15-18 more PRs? ### Before Cleanup + - [ ] All sections merged? - [ ] Ready to start link resolution? - [ ] Any orphaned content to address? - [ ] Ready to configure sidebars/redirects? ### Before Merge to Main + - [ ] All cleanup phases complete? - [ ] Documentation builds successfully? - [ ] Redirects tested? diff --git a/v4-docs-reference-plan.md b/v4-docs-reference-plan.md index a429b78d..6b03eb9b 100644 --- a/v4-docs-reference-plan.md +++ b/v4-docs-reference-plan.md @@ -90,6 +90,7 @@ Since we're consolidating v4.1 through v4.7 into a unified v4 reference, we need ### Annotation Patterns **For new features:** + ```markdown ## Relationships @@ -99,6 +100,7 @@ The `@relation` directive allows you to define relationships between tables... ``` **For changed features:** + ```markdown ### Auto-increment Primary Keys @@ -109,6 +111,7 @@ In previous versions, only GUIDs were supported for `ID` and `String` types. ``` **For deprecated features:** + ```markdown ## SQL Querying @@ -120,15 +123,18 @@ for modern alternatives. ``` **For configuration options:** + ```markdown ## Logging Configuration ### `logger.level` + - Type: `string` - Default: `"info"` - Added in: v4.1.0 ### `logger.per_component` + - Type: `object` - Default: `{}` - Added in: v4.6.0 @@ -149,6 +155,7 @@ Allows granular logging configuration per component or plugin. ### Building Version History When migrating content: + 1. Start with v4.7 documentation as the base (most current) 2. Compare with earlier versions (v4.6 → v4.5 → ... → v4.1) to identify when features appeared 3. Use release notes to validate feature introduction versions @@ -271,7 +278,7 @@ reference/ │ │ # are very similar but with the `loadAsInstance` thing have different signatures. │ │ # Easiest to stick to that model until we can simplify in future majors. │ │ -│ ├── global-apis.md # `tables`, `databases`, `transactions` etc. +│ ├── global-apis.md # `tables`, `databases`, `transactions` etc. │ │ # `server` has its own section so mention and link. │ │ │ └── query-optimization.md # Query optimization details and best practices @@ -347,7 +354,7 @@ reference/ │ ├── custom-functions/ # Custom functions (deprecated in favor of components) │ - └── sql/ # SQL guide (discouraged) + └── sql/ # SQL guide (discouraged) ``` ## Redirects diff --git a/v4-docs-research.md b/v4-docs-research.md index 40199a26..a41c57f8 100644 --- a/v4-docs-research.md +++ b/v4-docs-research.md @@ -1,10 +1,10 @@ # v4 Docs Map -The point of this is to figure out the documented evolution of Harper features and subsystems from v4.1 to v4.7 in order to create a holistic solution for reorganized singular major v4 versioned docs. +The point of this is to figure out the documented evolution of Harper features and subsystems from v4.1 to v4.7 in order to create a holistic solution for reorganized singular major v4 versioned docs. This is aligned with renaming `docs/` to `reference/` and furthering the ideal separation of prose content into `learn/`. -The structure I'm hoping to work towards is a single `reference/` folder layout that ideally has any _active_ features top-level. Then if there are legacy, deprecated, or even removed features (from a latest version), they will be documented in some sub-path such as `reference/legacy/` or something like that. +The structure I'm hoping to work towards is a single `reference/` folder layout that ideally has any _active_ features top-level. Then if there are legacy, deprecated, or even removed features (from a latest version), they will be documented in some sub-path such as `reference/legacy/` or something like that. When a feature has changed over time and some part of it is still active, but some other aspect has been deprecated; that feature should still live at top-level, but then that specific detail will be indicated as legacy/deprecated/removed. @@ -17,19 +17,20 @@ My plan is to go through versioned docs folders one by one and try to logically ## v4.1 Top Level sections include: + - Installation - Replaced by both newer installation instructions and learn guides - Getting Started - Replaced entirely by learn guides -- External API docs (api.harperdb.io) which has since been redirected to operations api +- External API docs (api.harperdb.io) which has since been redirected to operations api - Studio - We'll keep this around for now since it is still shipped with v4 - Cloud - No longer need to support these pages; cloud has been fully replaced by fabric - Security - - Lots of these pages still exist today but have been updated. - - In general I don't think there is anything in here that would be version specific; other than the larger concepts. - - This section has "JWT" and Certificate Management. in later harper versions we've likely added to those and so we'll detail that version specificity in the respective pages. + - Lots of these pages still exist today but have been updated. + - In general I don't think there is anything in here that would be version specific; other than the larger concepts. + - This section has "JWT" and Certificate Management. in later harper versions we've likely added to those and so we'll detail that version specificity in the respective pages. - Like there should be something that dictates that JWT support has existed as early as v4.1 - But say some other scheme (Cookies?) didn't work until whatever minor version - Clustering @@ -77,7 +78,7 @@ Top Level sections include: - Data Types - This becomes schema docs in the future - Dynamic Schema - - This becomes schema docs in the future + - This becomes schema docs in the future - Headers - `server-timing`, `hdb-response-time`, `content-type` - Limits @@ -99,7 +100,7 @@ First time pages have been nested into top-level sections Getting Started, Devel - Guide like that has been / will be replaced by Learn content - Subsections: - Caching - - This is a key feature + - This is a key feature - Debugging - This isn't necessarily a reference page; replaceable by Learn guide and cross links from configuration page (thread.debug) to Learn guide focussing on debugging - Fastify Routes @@ -116,19 +117,19 @@ First time pages have been nested into top-level sections Getting Started, Devel - This obviously will continue to have its own top-level section which will properly encapsulate applications, plugins, etc. (in-fact we already have the start of this in docs/reference/components now so we'll build off of that) - Operations API - First time having its own standalone section containing sub pages for all operations api types - - Likely want to retain something like this and ensure this is the single source of reference for all operations apis. feature pages should link to this directly + - Likely want to retain something like this and ensure this is the single source of reference for all operations apis. feature pages should link to this directly - Real-Time - This page still exists today in a similar fashion - - Need to consider making this nested i think and having dedicated pages for MQTT, WS, etc. - - Similar to ongoing idea below, likely want to have detailed ops/config info for any of these core features in their own reference section that parallels and links to/from other pages like a general overview pages. Akin to the general config or ops api page ideas, we could have another one for Real-Time that succinctly details the subsystems available, but then links out to reference and learn content depending what user wants. + - Need to consider making this nested i think and having dedicated pages for MQTT, WS, etc. + - Similar to ongoing idea below, likely want to have detailed ops/config info for any of these core features in their own reference section that parallels and links to/from other pages like a general overview pages. Akin to the general config or ops api page ideas, we could have another one for Real-Time that succinctly details the subsystems available, but then links out to reference and learn content depending what user wants. - nonetheless things like mqtt is a standalone plugin; document it as such - - but something like ws isn't exactly; its a feature of REST so ensure its appropriately organized by the plugin and well referenced for other sensible locations. + - but something like ws isn't exactly; its a feature of REST so ensure its appropriately organized by the plugin and well referenced for other sensible locations. - In this regard we may not need a top-level "Real Time" page. These specific features MQTT, WS, do deserve detailed reference pages and sections, but we don't have to arbitrarily group them like this. - REST - should remain top level but is truly a built-in plugin. can be structured like other plugin docs - may need to think through how to incorporate all the configuration and subfeatures of this. like ws and interconnectedness with Resource API and things like content types. this goes back to the organization of information problem that this could live under an umbrella like "HTTP" or "Networking", but is there value in having higherlevel pages or can we just list this top-level along with everything else - Security - - This might exist in v4.1 but aligned with some of the current thinking, this section has a "Configuration" page ... is this more like what we want out of dedicated sections for features and then having detailed subsections for similar parts? + - This might exist in v4.1 but aligned with some of the current thinking, this section has a "Configuration" page ... is this more like what we want out of dedicated sections for features and then having detailed subsections for similar parts? - Instead of having a whole `security/configuration` page, I believe this could live in a root, or the relative configurations should go into a more specific topic. like `security/cors` and that can contain general reference as well as specific configuration info - Otherwise, seeing some trend of existing feature scope here like Basic auth, Cert mgmt, JWT, and Users & Roles - So just like other places; we likely don't need to lump these all into a "Security" subsection and they could just have their own top-level reference topic. @@ -144,7 +145,7 @@ First time pages have been nested into top-level sections Getting Started, Devel - same as v4.1 page; should just exist top level or be completely folded into operations api - Logging - nested all three "Audit", "Standard", and "Transaction" - - again, why nest? and furthermore, most of these pages are just operations reference. + - again, why nest? and furthermore, most of these pages are just operations reference. - Deployments - Configuration File - Good start to an important reference page. as i've written else where, I likely want to have a configuration page be more general and then list out all options but link out to specific pages for detailed description and usage patterns. @@ -156,12 +157,12 @@ First time pages have been nested into top-level sections Getting Started, Devel - this is a learn guide now; any other info should be included else where like configuration page (in a subsection about say necessary installation variables or the like) - The "On Linux" subpage should be a learn guide if its even still relevant. - Upgrade - - likely can be removed or more simply retained. not as much upgrade info today. + - likely can be removed or more simply retained. not as much upgrade info today. - if there is actually some sort of api feature then it can documented in reference. but its just behavior of installation or something then absolutely simplify - Reference - - Many of the following subsections can exist as is; this is the basis for what we want this whole `/docs` section to become. + - Many of the following subsections can exist as is; this is the basis for what we want this whole `/docs` section to become. - Analytics - - this is just a table; theres a few of these "system" tables that we could detail somewhere more technically + - this is just a table; theres a few of these "system" tables that we could detail somewhere more technically - Architecture - high level info that would fit better in an earlier page or in something like applications - new learn content already has this info in it. @@ -177,8 +178,8 @@ First time pages have been nested into top-level sections Getting Started, Devel - Data Types - same as before; should be folded into a schema reference - Dynamic Schema - - as early as v4.2 we have this information disorganization where the user needs to read multiple different pages to even understand what the schema system is made of. if they missed the "defining schema" guide early on then this page and the previous make little sense. - - schemas system needs a detailed reference page! + - as early as v4.2 we have this information disorganization where the user needs to read multiple different pages to even understand what the schema system is made of. if they missed the "defining schema" guide early on then this page and the previous make little sense. + - schemas system needs a detailed reference page! - Globals - beginning of some js api reference docs that are important for extensions (at this time), but now applications and plugins - Headers @@ -192,7 +193,7 @@ First time pages have been nested into top-level sections Getting Started, Devel - Storage Algorithm - useful technical info; where is this today? Could likely be apart of a larger "DB" section or something or just "Additional Technical Details" as it doesn't have too much relevant info for app or even plugin devs. - Transactions - - is this another global api? + - is this another global api? - need to see what the state of this is today and ensure its represented in appropriate places like globals page - now maybe global page needs to be high level and we need separate pages for each api within it too? like logger could exist in logger of course. all the server stuff could exist in a Networking or simply "Server" part. @@ -272,12 +273,12 @@ There is really only one new file in v4.5, `reference/blob.md`, but the list of - Default replication port was changed from 9925 to 9933 - Expanded property access even if they aren't defined in a schema - Storage reclamation (more of a platform feature than any kind of api) -- Expanded sharding functionality +- Expanded sharding functionality - Certificate revocation in clustering - Built-in `loadEnv` plugin for environment variable loading - `cluster_status` operation updates - Improved URL path parsing for resources -- `server.authenticateUser` API +- `server.authenticateUser` API - HTTP/2 support fo api endpoints (`http2` option) - transactions can now be reused after calling `transaction.commit()` - GraphQL query endpoint can be configured to listen on different ports; its also now disabled by default to avoid conflicts @@ -306,7 +307,8 @@ In addition to that new features include: Only one new file; `'developers/security/certificate-verification.md'` Feature list much smaller: -- individual component status monitoring + +- individual component status monitoring - OCSP support - new analytics and licensing functionality (for Fabric) - Plugin API changes @@ -315,12 +317,12 @@ Feature list much smaller: From early on (v4.1) many features were fully controlled by ops apis. And at first they were presented based on the feature at hand. Like "Clustering", "Custom Functions", etc. and within the docs for that feature it included whatever relevant ops apis were needed. This makes me think that while we should have a technical reference for _all_ operations apis, it may be valuable to also associated specific ops apis with their relative feature. Like how is a user supposed to know if they want to do _clustering_ that they need to first look "ops apis"? Having a top level "Clustering" is valuable. That said; this is in part what the Learn section is meant to solve. Users should learn about how to Clustering via Learn guides. And then they can click through to reference pages for any other information. We also have Search in order to discover whatever specific ops apis. I think organizing the ops apis under an "Operations APIs" section is still correct but we should ensure discoverability. Maybe we don't nest it and just have them all viewable by default as soon as someone is looking at the left sidebar in Reference. -Just from reviewing v4.1 docs it is starting to show ideal core systems to document such as CLI, Operations API, Configuration, Schemas, Logging. Like the previous paragraph stated, some thought needs to be given to how information is organized. Logger is a great example of having configuration details, usage details, and API reference details. So should all of that exist under "Logging" or should it be spread out between sections? I think the reality is we'll need a bit of "both". Where there should be top-level sections "Configuration" and "Logging". Under configuration, it should have the general info about the config file and snake_case mapping to CLI options or operations API values, and it should list out all available configuration properties in a structure way (think JSON schema). Include short descriptions, but for any actual detail around say the `logger` section, it should link out to the Logging section for further information. Like expanded descriptions for example. Additionally, any "guide" or usage like info should be delegated to learn guides. But with this thinking; how should operations apis be documented? +Just from reviewing v4.1 docs it is starting to show ideal core systems to document such as CLI, Operations API, Configuration, Schemas, Logging. Like the previous paragraph stated, some thought needs to be given to how information is organized. Logger is a great example of having configuration details, usage details, and API reference details. So should all of that exist under "Logging" or should it be spread out between sections? I think the reality is we'll need a bit of "both". Where there should be top-level sections "Configuration" and "Logging". Under configuration, it should have the general info about the config file and snake_case mapping to CLI options or operations API values, and it should list out all available configuration properties in a structure way (think JSON schema). Include short descriptions, but for any actual detail around say the `logger` section, it should link out to the Logging section for further information. Like expanded descriptions for example. Additionally, any "guide" or usage like info should be delegated to learn guides. But with this thinking; how should operations apis be documented? -Should we simplify Ops Api section to include general ops api info (requests, endpoints, w/e), and then have a table/list of available (and deprecated) ops apis with short descriptions and then links out to other docs (related to the respective feature) that details the op? +Should we simplify Ops Api section to include general ops api info (requests, endpoints, w/e), and then have a table/list of available (and deprecated) ops apis with short descriptions and then links out to other docs (related to the respective feature) that details the op? Could we introduce some form of a "tag" system for pages? This could help with information organization as we could get rid of top-level pages like "Real-Time" or "Security" and just tag relevant sections based on some of those top-level topics. We could incorporate these tags into search or even some of navigation mechanism. This may be more satisfactory of a compromise for self-navigation. Its simpler than trying to come up with overly organized top-level sections, and is better than search (though AI search would definitely trump this). I think a fundamental issue is that users still are hesitant to use search since its traditionally such a poor experience. Now with AI baked in its improved tremendously but still users aren't gravitating towards it. Many are simply used to self-navigating and so we need to find some compromise. Going back to concept of "tags", idk if that necessarily solves that problem unless we introduce a more interactive search page. I think i'd rather just ensure that searching `"networking"` will actually return pages like HTTP, REST, MQTT, w/e. As I make my way through later v4 minors (4.3, 4.4, 4.5) its starting to show how the docs structure from as early as 4.2 doesn't change all too much. If I can sufficiently map out the top-level features to document, then come up with a reasonable format/structure for pages (like how to actually detail changes over versions), we should be in a really good place. Overall we'll significantly simplify the reference docs and make it much easier to maintain going into v5. We'll meet our obligation to provide "support" for existing v4 minors since we'll have changes documented. We've done an excellent job not breaking any apis over the development of v4 so in theory there shouldn't be much concern if say a v4.5 user was reading v4 docs which are more representative of latest v4.7 information but also contain notes about how things had changed for any particular part from v4.5 to v4.6 and beyond. -The real challenge in all of this is to figure out the high-level organization of information. I've flip-flopped a bit between high-level general pages and how everything should be organized, but I think through a lot of this it seems apparent we should document individual plugins and features thus the docs will logically map to the implementation. There will obviously be some cross-cuts, but i think organizing by feature makes the most sense. \ No newline at end of file +The real challenge in all of this is to figure out the high-level organization of information. I've flip-flopped a bit between high-level general pages and how everything should be organized, but I think through a lot of this it seems apparent we should document individual plugins and features thus the docs will logically map to the implementation. There will obviously be some cross-cuts, but i think organizing by feature makes the most sense. From c204af0293fe8bec4aed122a26a57bc04762425a Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 24 Feb 2026 13:10:32 -0700 Subject: [PATCH 11/51] update map progress statusses --- v4-docs-migration-map.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 97466770..5a117cc6 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -25,7 +25,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/harper-cli.md` (if exists) - **Merge Required**: Yes - CLI commands added across versions - **Version Annotations**: Track command additions from v4.1 → v4.7 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API commands - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Dev mode (`harperdb dev`, `harperdb run`) @@ -35,7 +35,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: Compare all versions for command evolution - **Version Annotations**: Each command should note its introduction version -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Foreground mode changes @@ -45,14 +45,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` - **Additional Sources**: `versioned_docs/version-4.3+` (CLI ops api support added in v4.3) - **Version Annotations**: Note v4.3.0 introduction -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API ### `reference/cli/authentication.md` - **Primary Source**: New content or extract from CLI docs -- **Status**: In Progress +- **Status**: Complete --- @@ -648,7 +648,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Added: v4.4.0 (experimental) - Disabled by default: v4.5.0 -- **Status**: In Progress +- **Status**: Complete - **Notes**: Mark as experimental/incomplete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - OpenAPI specification endpoint @@ -662,7 +662,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ### `reference/studio/overview.md` - **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` (localStudio configuration) -- **Status**: In Progress +- **Status**: Complete - **Notes**: Simple overview page focusing on: - How to configure/enable local Studio (localStudio.enabled in config) - How to access local Studio (http://localhost:9926) @@ -680,7 +680,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` - **Additional Sources**: Current `reference/define-routes.md` -- **Status**: In Progress +- **Status**: Complete - **Notes**: Discouraged in favor of modern routing with components, but still a supported feature. --- @@ -691,7 +691,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/administration/harper-studio/*` - **Additional Sources**: `versioned_docs/version-4.7/deployments/harper-cloud/*` -- **Status**: In Progress +- **Status**: Complete - **Notes**: The primary and additional sources are to be completely removed and this section is to act as a basic landing page to direct users to Fabric instead. ### `reference/legacy/custom-functions/` From 9bee99ed22f88b579d6409de36d8dc164cd45084 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 24 Feb 2026 14:21:09 -0700 Subject: [PATCH 12/51] update workflow to do preview deploys for new files paths --- .github/workflows/pr-preview.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pr-preview.yaml b/.github/workflows/pr-preview.yaml index c79617af..25e5b44c 100644 --- a/.github/workflows/pr-preview.yaml +++ b/.github/workflows/pr-preview.yaml @@ -4,6 +4,10 @@ on: pull_request_target: types: [opened, synchronize, reopened, closed] paths: + - 'reference/**' + - 'reference_versioned_docs/**' + - 'reference_versioned_sidebars/**' + - 'reference_versions.json' - 'docs/**' - 'fabric/**' - 'learn/**' From cd47bee3d2bc5e48c4fe88d5b7f56bb9a5b1c20f Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Wed, 25 Feb 2026 08:20:13 -0700 Subject: [PATCH 13/51] Environment Variables Migration (#444) * docs: migrate Environment Variables section to v4 consolidated reference Adds the Environment Variables section focused on the built-in `loadEnv` plugin. Harper-level env var configuration (naming conventions, HDB_CONFIG, HARPER_DEFAULT_CONFIG, HARPER_SET_CONFIG) is intentionally deferred to the Configuration section, with research notes captured in the migration map. Co-Authored-By: Claude Sonnet 4.6 * Update overview.md Co-authored-by: Chris Barber * adjust intro * fix titles --------- Co-authored-by: Claude Sonnet 4.6 Co-authored-by: Chris Barber --- ...environment-variables-link-placeholders.md | 19 +++++ .../environment-variables/overview.md | 77 +++++++++++++++++++ .../version-v4-sidebars.json | 13 ++++ v4-docs-migration-map.md | 17 ++-- 4 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 migration-context/link-placeholders/environment-variables-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/environment-variables/overview.md diff --git a/migration-context/link-placeholders/environment-variables-link-placeholders.md b/migration-context/link-placeholders/environment-variables-link-placeholders.md new file mode 100644 index 00000000..8943b2ec --- /dev/null +++ b/migration-context/link-placeholders/environment-variables-link-placeholders.md @@ -0,0 +1,19 @@ +# Link Placeholders for Environment Variables + +## reference_versioned_docs/version-v4/environment-variables/overview.md + +- Line 14: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: Intro list item — "Harper configuration via environment variables — see Configuration" + - Target should be: Configuration section overview page + +- Line 67: `[Resource Extension](TODO:reference_versioned_docs/version-v4/components/overview.md)` + - Context: Describing that `loadEnv` is a Resource Extension supporting standard `files`/`urlPath` config options + - Target should be: Components overview page (covering the Resource Extension concept) + +- Line 79: `[Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md)` + - Context: "Related" section + - Target should be: Main components/extensions reference page + +- Line 80: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: "Related" section — linking to where env var config details live + - Target should be: Configuration section overview page diff --git a/reference_versioned_docs/version-v4/environment-variables/overview.md b/reference_versioned_docs/version-v4/environment-variables/overview.md new file mode 100644 index 00000000..2b66a486 --- /dev/null +++ b/reference_versioned_docs/version-v4/environment-variables/overview.md @@ -0,0 +1,77 @@ +--- +id: overview +title: Environment Variables +--- + + + + + +Harper supports loading environment variables in Harper applications `process.env` using the built-in `loadEnv` plugin. This is the standard way to supply secrets and configuration to your Harper components without hardcoding values. `loadEnv` does **not** need to be installed as it is built into Harper and only needs to be declared in your `config.yaml`. + +:::note +If you are looking for information on how to configure your Harper installation using environment variables, see [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration section overview, including environment variable configuration') section for more information. +::: + +## Basic Usage + +```yaml +loadEnv: + files: '.env' +``` + +This loads the `.env` file from the root of your component directory into `process.env`. + +## Load Order + +> **Important:** Specify `loadEnv` first in your `config.yaml` so that environment variables are loaded before any other components start. + +```yaml +# config.yaml — loadEnv must come first +loadEnv: + files: '.env' + +rest: true + +myApp: + files: './src/*.js' +``` + +Because Harper is a single-process application, environment variables are loaded onto `process.env` and are shared across all components. As long as `loadEnv` is listed before dependent components, those components will have access to the loaded variables. + +## Override Behavior + +By default, `loadEnv` follows the standard dotenv convention: **existing environment variables take precedence** over values in `.env` files. This means variables already set in the shell or container environment will not be overwritten. + +To override existing environment variables, use the `override` option: + +```yaml +loadEnv: + files: '.env' + override: true +``` + +## Multiple Files + +As a Harper plugin, `loadEnv` supports multiple files using either glob patterns or a list of files in the configuration: + +```yaml +loadEnv: + files: + - '.env' + - '.env.local' +``` + +or + +```yaml +loadEnv: + files: 'env-vars/*' +``` + +Files are loaded in the order specified. + +## Related + +- [Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Extensions overview') +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration section, including environment variable configuration conventions and HARPER_DEFAULT_CONFIG / HARPER_SET_CONFIG') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 7b60e0b1..f896b209 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -33,6 +33,19 @@ } ] }, + { + "type": "category", + "label": "Environment Variables", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "environment-variables/overview", + "label": "Overview" + } + ] + }, { "type": "category", "label": "GraphQL Querying", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 5a117cc6..9c55190d 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -65,10 +65,19 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - Current `reference/configuration.md` - `versioned_docs/version-4.1/configuration.md` (baseline) - **Status**: Not Started +- **Notes**: Must include a dedicated section on environment variable configuration. Content researched and ready from the environment-variables migration: + - **Naming convention**: YAML keys map to `SCREAMING_SNAKE_CASE` env vars (e.g. `http.port` → `HTTP_PORT`, `operationsApi.network.port` → `OPERATIONSAPI_NETWORK_PORT`). Case-insensitive. Component configuration cannot be set this way. + - **`HDB_CONFIG`**: CLI/ENV variable to specify a custom config file path at install time. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 51-55. + - **`HARPER_DEFAULT_CONFIG`**: Added in v4.7.2. Sets default config values as JSON, respects user edits, restores original on key removal. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 80-116 + `release_notes/4.7.2.md`. + - **`HARPER_SET_CONFIG`**: Added in v4.7.2. Forces config values that always win, even over user edits. Deleted (not restored) on key removal. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 118-145 + `release_notes/4.7.2.md`. + - **Configuration precedence**: `HARPER_SET_CONFIG` > user manual edits > `HARPER_DEFAULT_CONFIG` > file defaults. + - **State tracking**: Harper maintains `{rootPath}/backup/.harper-config-state.json` for drift detection and restoration. + - Full content is in `reference_versioned_docs/version-v4/environment-variables/configuration.md` — this file should be deleted after porting its content here. - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Major config changes (http section, componentRoot) - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Configuration improvements - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Developer/production mode + - [4.7.2](release-notes/v4-tucker/4.7.2.md) - HARPER_SET_CONFIG and HARPER_DEFAULT_CONFIG added ### `reference/configuration/options.md` @@ -402,15 +411,11 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: New content about `loadEnv` plugin - **Additional Sources**: Built-in extensions docs, configuration docs - **Version Annotations**: loadEnv added in v4.5.0 -- **Status**: Not Started +- **Status**: In Progress +- **Notes**: Covers `loadEnv` extension only. Harper-level environment variable configuration (naming conventions, `HDB_CONFIG`, `HARPER_DEFAULT_CONFIG`, `HARPER_SET_CONFIG`) belongs in the Configuration section — see notes there. - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Built-in loadEnv component -### `reference/environment-variables/configuration.md` - -- **Primary Source**: Extract from configuration docs or components docs -- **Status**: Not Started - --- ## Static Files Section From fa4d2f38db2c6668dc336700375f2528ee36477b Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 26 Feb 2026 11:31:56 -0700 Subject: [PATCH 14/51] HTTP section migration (#446) * docs: migrate HTTP section to v4 consolidated reference Adds reference_versioned_docs/version-v4/http/ with four pages: - overview.md: architecture, threading model, protocols served - configuration.md: full http section config reference - tls.md: top-level tls section reference (shared by HTTP and MQTT) - api.md: server global API (server.http, Request/Response, server.ws, etc.) Also adds migration-context/link-placeholders/http-link-placeholders.md and updates the v4 sidebar and migration map status. Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate HTTP section to v4 consolidated reference --------- Co-authored-by: Claude Sonnet 4.6 --- .../http-link-placeholders.md | 57 +++ .../version-v4/http/api.md | 401 ++++++++++++++++++ .../version-v4/http/configuration.md | 342 +++++++++++++++ .../version-v4/http/overview.md | 64 +++ .../version-v4/http/tls.md | 119 ++++++ .../version-v4-sidebars.json | 28 ++ v4-docs-migration-map.md | 6 +- 7 files changed, 1014 insertions(+), 3 deletions(-) create mode 100644 migration-context/link-placeholders/http-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/http/api.md create mode 100644 reference_versioned_docs/version-v4/http/configuration.md create mode 100644 reference_versioned_docs/version-v4/http/overview.md create mode 100644 reference_versioned_docs/version-v4/http/tls.md diff --git a/migration-context/link-placeholders/http-link-placeholders.md b/migration-context/link-placeholders/http-link-placeholders.md new file mode 100644 index 00000000..b114a76f --- /dev/null +++ b/migration-context/link-placeholders/http-link-placeholders.md @@ -0,0 +1,57 @@ +# Link Placeholders for HTTP Section + +## reference_versioned_docs/version-v4/http/tls.md + +- Line (intro): `[Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md)` + - Context: Noting that operationsApi.tls overrides the root tls section + - Target should be: Configuration section operations.md page + +- Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Certificate management, mTLS, and other security topics + - Target should be: Security section overview page + +## reference_versioned_docs/version-v4/http/overview.md + +- Line (TLS section): `[Security](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Directing readers to certificate management details + - Target should be: Security section overview page + +- Line (Related section): `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` + - Context: Related reference for REST protocol + - Target should be: REST section overview page + +- Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Related reference for security/TLS/mTLS + - Target should be: Security section overview page + +## reference_versioned_docs/version-v4/http/configuration.md + +- Line (TLS section): `[Security](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Directing readers to full certificate management details + - Target should be: Security section overview page + +- Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Related reference for TLS/mTLS configuration + - Target should be: Security section overview page + +- Line (Related section): `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: Full configuration reference + - Target should be: Configuration section overview page + +## reference_versioned_docs/version-v4/http/api.md + +- Line (server.operation): `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Link to operations API overview + - Target should be: Operations API overview page + +- Line (server.recordAnalytics): `[analytics API](TODO:reference_versioned_docs/version-v4/analytics/overview.md)` + - Context: Link to analytics reference + - Target should be: Analytics overview page + +- Line (Related section): `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` + - Context: Related reference for REST protocol + - Target should be: REST section overview page + +- Line (Related section): `[Global APIs](TODO:reference_versioned_docs/version-v4/resources/global-apis.md)` + - Context: Full global API reference including tables, databases, Resource, logger, auth + - Target should be: Resources global-apis page diff --git a/reference_versioned_docs/version-v4/http/api.md b/reference_versioned_docs/version-v4/http/api.md new file mode 100644 index 00000000..97431214 --- /dev/null +++ b/reference_versioned_docs/version-v4/http/api.md @@ -0,0 +1,401 @@ +--- +id: api +title: HTTP API +--- + + + + +The `server` global object is available in all Harper component code. It provides access to the HTTP server middleware chain, WebSocket server, authentication, resource registry, and cluster information. + +## `server.http(listener, options)` + +Add a handler to the HTTP request middleware chain. + +```ts +server.http(listener: RequestListener, options?: HttpOptions): HttpServer[] +``` + +Returns an array of `HttpServer` instances based on the `options.port` and `options.securePort` values. + +**Example:** + +```js +server.http( + (request, next) => { + if (request.url === '/graphql') return handleGraphQLRequest(request); + return next(request); + }, + { runFirst: true } +); +``` + +### `RequestListener` + +```ts +type RequestListener = (request: Request, next: RequestListener) => Promise; +``` + +To continue the middleware chain, call `next(request)`. To short-circuit, return a `Response` (or `Response`-like object) directly. + +### `HttpOptions` + +| Property | Type | Default | Description | +| ------------ | ------- | ----------------- | --------------------------------------------- | +| `runFirst` | boolean | `false` | Insert this handler at the front of the chain | +| `port` | number | `http.port` | Target the HTTP server on this port | +| `securePort` | number | `http.securePort` | Target the HTTPS server on this port | + +### `HttpServer` + +A Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. + +--- + +## `Request` + +A `Request` object is passed to HTTP middleware handlers and direct static REST handlers. It follows the [WHATWG `Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) API with additional Harper-specific properties. + +### Properties + +| Property | Type | Description | +| ---------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `url` | string | The request target (path + query string), e.g. `/path?query=string` | +| `method` | string | HTTP method: `GET`, `POST`, `PUT`, `DELETE`, etc. | +| `headers` | [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) | Request headers | +| `pathname` | string | Path portion of the URL, without query string | +| `protocol` | string | `http` or `https` | +| `data` | any | Deserialized body, based on `Content-Type` header | +| `ip` | string | Remote IP address of the client (or last proxy) | +| `host` | string | Host from the request headers | +| `session` | object | Current cookie-based session (a `Table` record instance). Update with `request.session.update({ key: value })`. A cookie is set automatically the first time a session is updated or a login occurs. | + +### Methods + +#### `request.login(username, password)` + +```ts +login(username: string, password: string): Promise +``` + +Authenticates the user by username and password. On success, creates a session and sets a cookie on the response. Rejects if authentication fails. + +#### `request.sendEarlyHints(link, headers?)` + +```ts +sendEarlyHints(link: string, headers?: object): void +``` + +Sends an [Early Hints](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/103) (HTTP 103) response before the final response. Useful in cache resolution functions to hint at preloadable resources: + +```javascript +class Origin { + async get(request) { + this.getContext().requestContext.sendEarlyHints(''); + return fetch(request); + } +} +Cache.sourcedFrom(Origin); +``` + +### Low-Level Node.js Access + +:::caution +These properties expose the raw Node.js request/response objects and should be used with caution. Using them can break other middleware handlers that depend on the layered `Request`/`Response` pattern. +::: + +| Property | Description | +| --------------- | ----------------------------------------------------------------------------------------------------- | +| `_nodeRequest` | Underlying [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) | +| `_nodeResponse` | Underlying [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) | + +--- + +## `Response` + +REST method handlers can return: + +- **Data directly** — Serialized using Harper's content negotiation +- **A `Response` object** — The WHATWG [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) +- **A `Response`-like object** — A plain object with the following properties: + +| Property | Type | Description | +| --------- | --------------------------------------------------------------------- | ------------------------------------------------- | +| `status` | number | HTTP status code (e.g. `200`, `404`) | +| `headers` | [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) | Response headers | +| `data` | any | Response data, serialized via content negotiation | +| `body` | Buffer \| string \| ReadableStream \| Blob | Raw response body (alternative to `data`) | + +--- + +## `server.ws(listener, options)` + +Add a handler to the WebSocket connection middleware chain. + +```ts +server.ws(listener: WsListener, options?: WsOptions): HttpServer[] +``` + +**Example:** + +```js +server.ws((ws, request, chainCompletion) => { + chainCompletion.then(() => { + ws.on('message', (data) => console.log('received:', data)); + ws.send('hello'); + }); +}); +``` + +### `WsListener` + +```ts +type WsListener = (ws: WebSocket, request: Request, chainCompletion: Promise, next: WsListener) => Promise; +``` + +| Parameter | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------- | +| `ws` | [`WebSocket`](https://github.com/websockets/ws/blob/main/doc/ws.md#class-websocket) instance | +| `request` | Harper `Request` object from the upgrade event | +| `chainCompletion` | `Promise` that resolves when the HTTP request chain finishes. Await before sending to ensure the HTTP request is handled. | +| `next` | Continue chain: `next(ws, request, chainCompletion)` | + +### `WsOptions` + +| Property | Type | Default | Description | +| ------------ | ------- | ----------------- | ----------------------------------------------- | +| `maxPayload` | number | 100 MB | Maximum WebSocket payload size | +| `runFirst` | boolean | `false` | Insert this handler at the front of the chain | +| `port` | number | `http.port` | Target the WebSocket server on this port | +| `securePort` | number | `http.securePort` | Target the secure WebSocket server on this port | + +--- + +## `server.upgrade(listener, options)` + +Add a handler to the HTTP server `upgrade` event. Use this to delegate upgrade events to an external WebSocket server. + +```ts +server.upgrade(listener: UpgradeListener, options?: UpgradeOptions): void +``` + +**Example** (from the Harper Next.js component): + +```js +server.upgrade( + (request, socket, head, next) => { + if (request.url === '/_next/webpack-hmr') { + return upgradeHandler(request, socket, head).then(() => { + request.__harperdb_request_upgraded = true; + next(request, socket, head); + }); + } + return next(request, socket, head); + }, + { runFirst: true } +); +``` + +When `server.ws()` is registered, Harper adds a default upgrade handler. The default handler sets `request.__harperdb_request_upgraded = true` after upgrading, and checks for this flag before upgrading again (so external upgrade handlers can detect whether Harper has already handled the upgrade). + +### `UpgradeListener` + +```ts +type UpgradeListener = (request: IncomingMessage, socket: Socket, head: Buffer, next: UpgradeListener) => void; +``` + +### `UpgradeOptions` + +| Property | Type | Default | Description | +| ------------ | ------- | ----------------- | ------------------------------------ | +| `runFirst` | boolean | `false` | Insert at the front of the chain | +| `port` | number | `http.port` | Target the HTTP server on this port | +| `securePort` | number | `http.securePort` | Target the HTTPS server on this port | + +--- + +## `server.socket(listener, options)` + +Create a raw TCP or TLS socket server. + +```ts +server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer +``` + +Only one socket server is created per call. A `securePort` takes precedence over `port`. + +### `ConnectionListener` + +Node.js connection listener as in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener). + +### `SocketOptions` + +| Property | Type | Description | +| ------------ | ------ | -------------------------------------------------------------------------- | +| `port` | number | Port for a [`net.Server`](https://nodejs.org/api/net.html#class-netserver) | +| `securePort` | number | Port for a [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) | + +### `SocketServer` + +A Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. + +--- + +## `server.authenticateUser(username, password)` + +Added in: v4.5.0 + +```ts +server.authenticateUser(username: string, password: string): Promise +``` + +Returns the user object for the given username after verifying the password. Throws if the password is incorrect. + +Use this when you need to explicitly verify a user's credentials (e.g., in a custom login endpoint). For lookup without password verification, use [`server.getUser()`](#servergetuserusername). + +--- + +## `server.getUser(username)` + +```ts +server.getUser(username: string): Promise +``` + +Returns the user object for the given username without verifying credentials. Use for authorization checks when the user is already authenticated. + +--- + +## `server.resources` + +The central registry of all resources exported for REST, MQTT, and other protocols. + +### `server.resources.set(name, resource, exportTypes?)` + +Register a resource: + +```js +class NewResource extends Resource {} +server.resources.set('NewResource', NewResource); + +// Limit to specific protocols: +server.resources.set('NewResource', NewResource, { rest: true, mqtt: false }); +``` + +### `server.resources.getMatch(path, exportType?)` + +Find a resource matching a path: + +```js +server.resources.getMatch('/NewResource/some-id'); +server.resources.getMatch('/NewResource/some-id', 'rest'); +``` + +--- + +## `server.operation(operation, context?, authorize?)` + +Execute an [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') operation programmatically. + +```ts +server.operation(operation: object, context?: { username: string }, authorize?: boolean): Promise +``` + +| Parameter | Type | Description | +| ----------- | ---------------------- | ---------------------------------------------------- | +| `operation` | object | Operations API request body | +| `context` | `{ username: string }` | Optional: execute as this user | +| `authorize` | boolean | Whether to apply authorization. Defaults to `false`. | + +--- + +## `server.recordAnalytics(value, metric, path?, method?, type?)` + +Record a metric into Harper's analytics system. + +```ts +server.recordAnalytics(value: number, metric: string, path?: string, method?: string, type?: string): void +``` + +| Parameter | Description | +| --------- | ---------------------------------------------------------------------------- | +| `value` | Numeric value (e.g. duration in ms, bytes) | +| `metric` | Metric name | +| `path` | Optional URL path for grouping (omit per-record IDs — use the resource name) | +| `method` | Optional HTTP method for grouping | +| `type` | Optional type for grouping | + +Metrics are aggregated and available via the [analytics API](TODO:reference_versioned_docs/version-v4/analytics/overview.md 'Analytics overview'). + +--- + +## `server.config` + +The parsed `harperdb-config.yaml` configuration object. Read-only access to Harper's current runtime configuration. + +--- + +## `server.nodes` + +Returns an array of node objects registered in the cluster. + +## `server.shards` + +Returns a map of shard number to an array of associated nodes. + +## `server.hostname` + +Returns the hostname of the current node. + +## `server.contentTypes` + +Returns the `Map` of registered content type handlers. Same as the global [`contentTypes`](#contenttypes) object. + +--- + +## `contentTypes` + +A [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for HTTP request/response serialization. Harper uses content negotiation: the `Content-Type` header selects the deserializer for incoming requests, and the `Accept` header selects the serializer for responses. + +### Built-in Content Types + +| MIME type | Description | +| --------------------- | ------------------ | +| `application/json` | JSON | +| `application/cbor` | CBOR | +| `application/msgpack` | MessagePack | +| `text/csv` | CSV | +| `text/event-stream` | Server-Sent Events | + +### Custom Content Type Handlers + +Register or replace a handler by setting it on the `contentTypes` map: + +```js +import { contentTypes } from 'harperdb'; + +contentTypes.set('text/xml', { + serialize(data) { + return '' + serialize(data) + ''; + }, + q: 0.8, // quality: lower = less preferred during content negotiation +}); +``` + +### Handler Interface + +| Property | Type | Description | +| --------------------------- | ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| `serialize(data)` | `(any) => Buffer \| Uint8Array \| string` | Serialize data for a response | +| `serializeStream(data)` | `(any) => ReadableStream` | Serialize as a stream (for async iterables or large data) | +| `deserialize(buffer)` | `(Buffer \| string) => any` | Deserialize an incoming request body. Used when `deserializeStream` is absent. String for `text/*` types, Buffer for binary types. | +| `deserializeStream(stream)` | `(ReadableStream) => any` | Deserialize an incoming request stream | +| `q` | number (0–1) | Quality indicator for content negotiation. Defaults to `1`. | + +--- + +## Related + +- [HTTP Overview](./overview) +- [HTTP Configuration](./configuration) +- [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface overview') +- [Global APIs](TODO:reference_versioned_docs/version-v4/resources/global-apis.md 'All global APIs including tables, databases, Resource, logger, auth') diff --git a/reference_versioned_docs/version-v4/http/configuration.md b/reference_versioned_docs/version-v4/http/configuration.md new file mode 100644 index 00000000..3a6a7413 --- /dev/null +++ b/reference_versioned_docs/version-v4/http/configuration.md @@ -0,0 +1,342 @@ +--- +id: configuration +title: HTTP Configuration +--- + + + + + + +The `http` section in `harperdb-config.yaml` controls the built-in HTTP server that serves REST, WebSocket, component, and Operations API traffic. + +Harper must be restarted for configuration changes to take effect. + +## Ports + +### `http.port` + +Type: `integer` + +Default: `9926` + +The port the HTTP server listens on. This is the primary port for REST, WebSocket, MQTT-over-WebSocket, and component traffic. + +### `http.securePort` + +Type: `integer` + +Default: `null` + +The port for HTTPS connections. Requires a valid `tls` section configured with certificate and key. When set, Harper accepts both plaintext (`http.port`) and TLS connections (`http.securePort`) simultaneously. + +## TLS + +TLS is configured in its own top-level `tls` section in `harperdb-config.yaml`, separate from the `http` section. It is shared by the HTTP server (HTTPS), the MQTT broker (secure MQTT), and any TLS socket servers. See [TLS Configuration](./tls) for all options including multi-domain (SNI) certificates and the Operations API override. + +To enable HTTPS, set `http.securePort` and add a `tls` block: + +```yaml +http: + securePort: 9927 + +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +## HTTP/2 + +### `http.http2` + +Added in: v4.5.0 + +Type: `boolean` + +Default: `false` + +Enables HTTP/2 for all API endpoints. HTTP/2 requires TLS, so `http.securePort` must also be set. + +```yaml +http: + http2: true + securePort: 9927 +``` + +## Timeouts and Limits + +### `http.headersTimeout` + +Type: `integer` + +Default: `60000` (ms) + +Maximum time in milliseconds the server waits to receive the complete HTTP headers for a request. + +### `http.keepAliveTimeout` + +Type: `integer` + +Default: `30000` (ms) + +Milliseconds of inactivity after which the server closes an idle keep-alive connection. + +### `http.timeout` + +Type: `integer` + +Default: `120000` (ms) + +Maximum time in milliseconds before a request times out. + +### `http.maxHeaderSize` + +Type: `integer` + +Default: `16394` (bytes) + +Maximum allowed size of HTTP request headers. + +### `http.requestQueueLimit` + +Type: `integer` + +Default: `20000` (ms) + +The maximum estimated request queue time in milliseconds. When the queue exceeds this limit, requests are rejected with HTTP 503. + +## Compression + +### `http.compressionThreshold` + +Added in: v4.2.0 + +Type: `number` + +Default: `1200` (bytes) + +For clients that support Brotli encoding (`Accept-Encoding: br`), responses larger than this threshold are compressed. Streaming query responses are always compressed for supporting clients, regardless of this setting (since their size is unknown upfront). + +```yaml +http: + compressionThreshold: 1200 +``` + +## CORS + +### `http.cors` + +Type: `boolean` + +Default: `true` + +Enables Cross-Origin Resource Sharing, allowing requests from different origins. + +### `http.corsAccessList` + +Type: `string[]` + +Default: `null` + +An array of allowed origin domains when CORS is enabled. When `null`, all origins are allowed. + +```yaml +http: + cors: true + corsAccessList: + - https://example.com + - https://app.example.com +``` + +### `http.corsAccessControlAllowHeaders` + +Added in: v4.5.0 + +Type: `string` + +Default: `"Accept, Content-Type, Authorization"` + +Comma-separated list of headers allowed in the [`Access-Control-Allow-Headers`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) response header for OPTIONS (preflight) requests. + +## Session Affinity + +### `http.sessionAffinity` + +Added in: v4.1.0 + +Type: `string` + +Default: `null` + +Routes repeated requests from the same client to the same worker thread. This can improve caching locality and provide fairness in request handling. + +Accepted values: + +- `ip` — Route by the remote IP address. Use this when Harper is the public-facing server and each client has a distinct IP. +- `` — Route by the value of any HTTP header (e.g., `Authorization`). Use this when Harper is behind a proxy where all requests share the same source IP. + +```yaml +http: + sessionAffinity: ip +``` + +:::caution +If Harper is behind a reverse proxy and you use `ip`, all requests will share the proxy's IP and will be routed to a single thread. Use a header-based value instead. +::: + +## mTLS + +### `http.mtls` + +Added in: v4.3.0 + +Type: `boolean | object` + +Default: `false` + +Enables mutual TLS (mTLS) authentication for HTTP connections. When set to `true`, client certificates are verified against the CA specified in `tls.certificateAuthority`. Authenticated connections use the `CN` (common name) from the certificate subject as the Harper username. + +```yaml +http: + mtls: true +``` + +For granular control, specify an object: + +| Property | Type | Default | Description | +| ------------------------- | ----------------- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `user` | string | (CN from cert) | Authenticate all mTLS connections as this specific user. Set to `null` to skip credential-based authentication (requires combining with `required: true`). | +| `required` | boolean | `false` | Reject any connection that does not provide a valid client certificate. | +| `certificateVerification` | boolean \| object | `false` | Enable CRL/OCSP certificate revocation checking. See below. | + +### `http.mtls.certificateVerification` + +Added in: v4.7.0 (OCSP support) + +Type: `boolean | object` + +Default: `false` + +When mTLS is enabled, Harper can verify the revocation status of client certificates using CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol). Disabled by default; must be explicitly enabled for environments that require certificate revocation checking. + +Set to `true` to enable with all defaults, or configure as an object: + +**Global:** + +- `failureMode` — `'fail-closed'` (default) | `'fail-open'`. Whether to reject or allow connections when revocation checking fails. + +**CRL** (enabled by default when `certificateVerification` is enabled): + +- `crl.enabled` — boolean, default `true` +- `crl.timeout` — ms to wait for CRL download, default `10000` +- `crl.cacheTtl` — ms to cache CRL, default `86400000` (24h) +- `crl.gracePeriod` — ms grace period after CRL `nextUpdate`, default `86400000` (24h) +- `crl.failureMode` — CRL-specific failure mode + +**OCSP** (enabled by default as CRL fallback): + +- `ocsp.enabled` — boolean, default `true` +- `ocsp.timeout` — ms to wait for OCSP response, default `5000` +- `ocsp.cacheTtl` — ms to cache successful responses, default `3600000` (1h) +- `ocsp.errorCacheTtl` — ms to cache errors, default `300000` (5m) +- `ocsp.failureMode` — OCSP-specific failure mode + +Harper uses a CRL-first strategy with OCSP fallback. If both fail, the configured `failureMode` is applied. + +**Examples:** + +```yaml +# Basic mTLS, no revocation checking +http: + mtls: true + +# mTLS with revocation checking (recommended for production) +http: + mtls: + certificateVerification: true + +# Require mTLS for all connections + revocation checking +http: + mtls: + required: true + certificateVerification: true + +# Custom verification settings +http: + mtls: + certificateVerification: + failureMode: fail-closed + crl: + timeout: 15000 + cacheTtl: 43200000 + ocsp: + timeout: 8000 + cacheTtl: 7200000 +``` + +## Logging + +HTTP request logging is disabled by default. Enabling the `http.logging` block turns on request logging. + +### `http.logging` + +Added in: v4.6.0 + +Type: `object` + +Default: disabled + +```yaml +http: + logging: + level: info # info = all requests, warn = 4xx+, error = 5xx + path: ~/hdb/log/http.log + timing: true # log request timing + headers: false # log request headers (verbose) + id: true # assign and log a unique request ID +``` + +The `level` controls which requests are logged: + +- `info` (or more verbose) — All HTTP requests +- `warn` — Requests with status 400 or above +- `error` — Requests with status 500 or above + +## Complete Example + +```yaml +http: + port: 9926 + securePort: 9927 + http2: true + cors: true + corsAccessList: + - null + compressionThreshold: 1200 + headersTimeout: 60000 + keepAliveTimeout: 30000 + timeout: 120000 + maxHeaderSize: 16384 + requestQueueLimit: 20000 + sessionAffinity: null + mtls: false + logging: + level: warn + path: ~/hdb/log/http.log + timing: true + +# tls is a top-level section — see TLS Configuration +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +## Related + +- [HTTP Overview](./overview) +- [HTTP API](./api) +- [TLS Configuration](./tls) +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security overview, including TLS and mTLS') +- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full configuration reference') diff --git a/reference_versioned_docs/version-v4/http/overview.md b/reference_versioned_docs/version-v4/http/overview.md new file mode 100644 index 00000000..5d9a2365 --- /dev/null +++ b/reference_versioned_docs/version-v4/http/overview.md @@ -0,0 +1,64 @@ +--- +id: overview +title: HTTP Server +--- + + + + + + +Harper includes a built-in HTTP server that serves as the primary interface for REST, WebSocket, MQTT-over-WebSocket, and component-defined endpoints. The same server handles all application traffic on a configurable port (default `9926`). + +## Architecture + +Harper's HTTP server is multi-threaded. Each thread runs an independent copy of the HTTP stack, and incoming connections are distributed across threads using `SO_REUSEPORT` socket sharing — the most performant mechanism available for multi-threaded socket handling. + +Added in: v4.1.0 (worker threads for HTTP requests) + +Changed in: v4.2.0 (switched from process-per-thread model with session-affinity delegation to `SO_REUSEPORT` socket sharing) + +In previous versions: Session-affinity based socket delegation was used to route requests. This has been deprecated in favor of `SO_REUSEPORT`. + +## Request Handling + +Harper uses a layered middleware chain for HTTP request processing. Components and applications can add handlers to this chain using the [`server.http()`](./api#serverhttp) API. Handlers are called in order; each handler can either process the request and return a `Response`, or pass it along to the next handler with `next(request)`. + +Request and response objects follow the [WHATWG Fetch API](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API) conventions (`Request` and `Response` classes), providing good composability for layered middleware and clean mapping to REST resource handlers. + +## Protocols Served + +The HTTP server handles multiple protocols on the same port: + +- **REST** — CRUD operations on Harper resources via standard HTTP methods +- **WebSockets** — Real-time bidirectional connections (via `server.ws()`) +- **MQTT over WebSocket** — MQTT clients connecting over WebSocket (sub-protocol `mqtt`) +- **Server-Sent Events** — Streaming updates to browser clients +- **Operations API** — Management API (configurable to share or use separate port) + +## TLS / HTTPS + +HTTPS support is enabled by setting `http.securePort` in `harperdb-config.yaml` and configuring the `tls` section with a certificate and private key. The same `tls` configuration is shared by HTTPS and MQTT secure connections. + +See [Configuration](./configuration) for TLS options and [Security](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security overview') for certificate management details. + +## HTTP/2 + +Added in: v4.5.0 + +HTTP/2 can be enabled with the `http2: true` option in `harperdb-config.yaml`. When enabled, HTTP/2 applies to all API endpoints served on `http.securePort` (HTTP/2 requires TLS). + +## Compression + +Harper automatically compresses HTTP responses using Brotli for clients that advertise `Accept-Encoding: br`. Compression applies when the response body exceeds the configured `compressionThreshold` (default 1200 bytes). Streaming query responses are always compressed for clients that support it (since their size is not known upfront). + +## Logging + +HTTP request logging is not enabled by default. To enable it, add an `http.logging` block to your configuration. See [Configuration](./configuration#logging) for details. + +## Related + +- [HTTP Configuration](./configuration) +- [HTTP API](./api) +- [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface overview') +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security, TLS, mTLS, and authentication overview') diff --git a/reference_versioned_docs/version-v4/http/tls.md b/reference_versioned_docs/version-v4/http/tls.md new file mode 100644 index 00000000..9d7508d9 --- /dev/null +++ b/reference_versioned_docs/version-v4/http/tls.md @@ -0,0 +1,119 @@ +--- +id: tls +title: TLS Configuration +--- + + + + +Harper uses a top-level `tls` section in `harperdb-config.yaml` to configure Transport Layer Security. This configuration is shared by the HTTP server (HTTPS), the MQTT broker (secure MQTT), and any TLS socket servers created via the [HTTP API](./api#serversocketlistener-options). + +The `operationsApi` section can optionally define its own `tls` block, which overrides the root `tls` for Operations API traffic only. See the [Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md 'Operations API configuration reference') for more details. + +Harper must be restarted for TLS configuration changes to take effect. + +## TLS Configuration + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +### `tls.certificate` + +Type: `string` + +Default: `"/keys/certificate.pem"` + +Path to the PEM-encoded certificate file. + +### `tls.certificateAuthority` + +Type: `string` + +Default: `"/keys/ca.pem"` + +Path to the PEM-encoded certificate authority (CA) file. Used to verify client certificates when mTLS is enabled. + +### `tls.privateKey` + +Type: `string` + +Default: `"/keys/privateKey.pem"` + +Path to the PEM-encoded private key file. + +### `tls.host` + +Type: `string | undefined` + +The domain name this certificate entry applies to, used for SNI (Server Name Indication) matching. Only relevant when `tls` is defined as an array. When omitted, the certificate's common name (CN) is used as the host name. + +### `tls.ciphers` + +Type: `string | undefined` + +Default: `crypto.defaultCipherList` + +Colon-separated list of allowed TLS cipher suites. When omitted, Node.js [default ciphers](https://nodejs.org/api/crypto.html#nodejs-crypto-constants) are used. See Node.js [Modifying the default TLS cipher suite](https://nodejs.org/api/tls.html#modifying-the-default-tls-cipher-suite) for more information. + +## Enabling HTTPS + +To enable HTTPS, set `http.securePort` in addition to the `tls` section: + +```yaml +http: + securePort: 9927 + +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +When `http.securePort` is set, Harper accepts plaintext connections on `http.port` and TLS connections on `http.securePort` simultaneously. + +## Multi-Domain Certificates (SNI) + +To serve different certificates for different domains using Server Name Indication (SNI), define `tls` as an array of configuration objects. Each entry can optionally include a `host` property specifying which domain it applies to. If `host` is omitted, the certificate's common name and subject alternate names (SANs) are used. + +```yaml +tls: + - certificate: ~/hdb/keys/certificate1.pem + certificateAuthority: ~/hdb/keys/ca1.pem + privateKey: ~/hdb/keys/privateKey1.pem + host: example.com + - certificate: ~/hdb/keys/certificate2.pem + certificateAuthority: ~/hdb/keys/ca2.pem + privateKey: ~/hdb/keys/privateKey2.pem + # host omitted: certificate's CN is used +``` + +## Operations API Override + +The `operationsApi` section can define its own `tls` block to use a separate certificate for the Operations API: + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem + +operationsApi: + network: + securePort: 9924 + tls: + certificate: ~/hdb/keys/ops-certificate.pem + certificateAuthority: ~/hdb/keys/ops-ca.pem + privateKey: ~/hdb/keys/ops-privateKey.pem +``` + +See the [Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md 'Operations API configuration reference') for more details. + +## Related + +- [HTTP Configuration](./configuration) — `http.securePort`, `http.http2`, `http.mtls` +- [HTTP Overview](./overview) +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Certificate management, mTLS, and other security topics') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index f896b209..2f272dcd 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -46,6 +46,34 @@ } ] }, + { + "type": "category", + "label": "HTTP", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "http/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "http/configuration", + "label": "Configuration" + }, + { + "type": "doc", + "id": "http/api", + "label": "API" + }, + { + "type": "doc", + "id": "http/tls", + "label": "TLS" + } + ] + }, { "type": "category", "label": "GraphQL Querying", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 9c55190d..ac2e237c 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -439,7 +439,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: New content about HTTP server - **Additional Sources**: Configuration docs, architecture docs -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Worker threads for HTTP requests - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Socket management (SO_REUSEPORT), flexible port configs @@ -449,7 +449,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `reference/configuration.md` (http section) - **Version Annotations**: - HTTP/2 support: v4.5.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - HTTP/2 support @@ -459,7 +459,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: Current `reference/globals.md` - **Version Annotations**: - server.authenticateUser: v4.5.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - server.authenticateUser API From 2d5d2939003f612ff3f773c14b68fd0a5b217fc6 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 2 Mar 2026 14:50:35 -0700 Subject: [PATCH 15/51] Static Files Migration (#445) * docs: migrate Static Files section to v4 consolidated reference - Add reference_versioned_docs/version-v4/static-files/overview.md covering the static built-in plugin: basic usage, files/urlPath, v4.7 options (index, extensions, fallthrough, notFound), auto-update behavior, and examples including SPA client-side routing pattern - Add Static Files category to version-v4 sidebar - Add migration-context/link-placeholders/static-files-link-placeholders.md - Update v4-docs-migration-map.md status to In Progress with source notes Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate Static Files section to v4 consolidated reference --------- Co-authored-by: Claude Sonnet 4.6 --- .../static-files-link-placeholders.md | 15 ++ .../version-v4/static-files/overview.md | 174 ++++++++++++++++++ .../version-v4-sidebars.json | 13 ++ v4-docs-migration-map.md | 17 +- 4 files changed, 211 insertions(+), 8 deletions(-) create mode 100644 migration-context/link-placeholders/static-files-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/static-files/overview.md diff --git a/migration-context/link-placeholders/static-files-link-placeholders.md b/migration-context/link-placeholders/static-files-link-placeholders.md new file mode 100644 index 00000000..9bbb31bf --- /dev/null +++ b/migration-context/link-placeholders/static-files-link-placeholders.md @@ -0,0 +1,15 @@ +# Link Placeholders for Static Files + +## reference_versioned_docs/version-v4/static-files/overview.md + +- Line 41: `[Plugin](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview')` + - Context: Describing that `static` is a Plugin supporting standard `files`/`urlPath` config options + - Target should be: Components overview page (covering the Plugin concept and standard options) + +- Line 46: `[Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — files and urlPath options')` + - Context: Directing the reader to full `files` glob pattern and `urlPath` documentation + - Target should be: Components overview page + +- Line 114: `[Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — including files/urlPath options')` + - Context: "Related" section + - Target should be: Main components/plugins reference page diff --git a/reference_versioned_docs/version-v4/static-files/overview.md b/reference_versioned_docs/version-v4/static-files/overview.md new file mode 100644 index 00000000..053a8f5d --- /dev/null +++ b/reference_versioned_docs/version-v4/static-files/overview.md @@ -0,0 +1,174 @@ +--- +id: overview +title: Static Files +--- + + + + + + + +# Static Files + +- Added in: v4.5.0 +- Changed in: v4.7.0 - (Migrated to Plugin API and new options added) + +The `static` built-in plugin serves static files from your Harper application over HTTP. Use it to host websites, SPAs, downloadable assets, or any static content alongside your Harper data and API endpoints. + +`static` does **not** need to be installed — it is built into Harper and only needs to be declared in your `config.yaml`. + +## Basic Usage + +Configure `static` with the `files` option pointing to the files you want to serve: + +```yaml +static: + files: 'site/**' +``` + +Given a component with this structure: + +``` +my-app/ +├─ site/ +│ ├─ index.html +│ ├─ about.html +│ ├─ blog/ +│ ├─ post-1.html +│ ├─ post-2.html +├─ config.yaml +``` + +Files are accessed relative to the matched directory root, so `GET /index.html` returns `site/index.html` and `GET /blog/post-1.html` returns `site/blog/post-1.html`. + +## `files` and `urlPath` Options + +Added in: v4.5 + +`static` is a [Plugin](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview') and supports the standard `files` and `urlPath` configuration options for controlling which files to serve and at what URL path. + +Use `urlPath` to mount the files at a specific URL prefix: + +```yaml +static: + files: 'site/**' + urlPath: 'app' +``` + +Now `GET /app/index.html` returns `site/index.html` and `GET /app/blog/post-1.html` returns `site/blog/post-1.html`. + +See [Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — files and urlPath options') for full `files` glob pattern and `urlPath` documentation. + +## Additional Options + +Added in: v4.7 + +In addition to the standard `files`, `urlPath`, and `timeout` options, `static` supports these configuration options: + +- **`index`** - `boolean` - _optional_ - If `true`, automatically serves `index.html` when a request targets a directory. Defaults to `false`. + +- **`extensions`** - `string[]` - _optional_ - File extensions to try when an exact path match is not found. For example, `extensions: ['html']` means a request for `/page-1` will also try `/page-1.html`. + +- **`fallthrough`** - `boolean` - _optional_ - If `true`, passes the request to the next handler when the requested file is not found. Set to `false` when using `notFound` to customize 404 responses. Defaults to `true`. + +- **`notFound`** - `string | { file: string; statusCode: number }` - _optional_ - A custom file (or file + status code) to return when a path is not found. Useful for serving a custom 404 page or for SPAs that use client-side routing. + +## Auto-Updates + +Added in: v4.7.0 + +Because `static` uses the Plugin API, it automatically responds to changes without requiring a Harper restart. Adding, removing, or modifying files — or updating `config.yaml` — takes effect immediately. + +## Examples + +### Basic static file serving + +Serve all files in the `static/` directory. Requests must match file names exactly. + +```yaml +static: + files: 'static/**' +``` + +### Automatic `index.html` serving + +Serve `index.html` automatically when a request targets a directory: + +```yaml +static: + files: 'static/**' + index: true +``` + +With this structure: + +``` +my-app/ +├─ static/ +│ ├─ index.html +│ ├─ blog/ +│ ├─ index.html +│ ├─ post-1.html +``` + +Request mappings: + +``` +GET / -> static/index.html +GET /blog -> static/blog/index.html +GET /blog/post-1.html -> static/blog/post-1.html +``` + +### Automatic extension matching + +Combine `index` and `extensions` for clean URLs without file extensions: + +```yaml +static: + files: 'static/**' + index: true + extensions: ['html'] +``` + +Request mappings with the same structure: + +``` +GET / -> static/index.html +GET /blog -> static/blog/index.html +GET /blog/post-1 -> static/blog/post-1.html +``` + +### Custom 404 page + +Return a specific file when a requested path is not found: + +```yaml +static: + files: 'static/**' + notFound: 'static/404.html' + fallthrough: false +``` + +A request to `/non-existent` returns the contents of `static/404.html` with a `404` status code. + +> **Note:** When using `notFound`, set `fallthrough: false` so the request does not pass through to another handler before the custom 404 response is returned. + +### SPA client-side routing + +For SPAs that handle routing in the browser, return the main application file for any unmatched path: + +```yaml +static: + files: 'static/**' + fallthrough: false + notFound: + file: 'static/index.html' + statusCode: 200 +``` + +A request to any unmatched path returns `static/index.html` with a `200` status code, allowing the client-side router to handle navigation. + +## Related + +- [Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — including files/urlPath options') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 2f272dcd..70545900 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -46,6 +46,19 @@ } ] }, + { + "type": "category", + "label": "Static Files", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "static-files/overview", + "label": "Overview" + } + ] + }, { "type": "category", "label": "HTTP", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index ac2e237c..75a7d67a 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -422,14 +422,15 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ### `reference/static-files/overview.md` -- **Primary Source**: Extract from built-in plugins/extensions documentation -- **Additional Sources**: Current `reference/components/built-in-extensions.md` -- **Status**: Not Started - -### `reference/static-files/configuration.md` - -- **Primary Source**: Extract from configuration docs -- **Status**: Not Started +- **Primary Source**: `versioned_docs/version-4.7/reference/components/built-in-extensions.md` (static section) +- **Additional Sources**: + - `versioned_docs/version-4.6/reference/components/built-in-extensions.md` (pre-v4.7 behavior) + - `versioned_docs/version-4.5/developers/components/built-in.md` (early v4 behavior) +- **Status**: In Progress +- **Notes**: No separate `configuration.md` needed — all static plugin options are documented inline in the overview. The `configuration.md` entry has been removed; static file serving has no Harper-level configuration. The v4.7 Plugin API (`extensions`, `fallthrough`, `index`, `notFound` options and auto-update behavior) is annotated as added in v4.7.0 (inferred from version comparison; not present in v4.6 docs). The `static` plugin itself predates v4.7 (present in v4.4 and earlier). +- **Release Notes**: + - [4.7.2](release-notes/v4-tucker/4.7.2.md) - `static` handler defaults to `index.html` + - [4.7.3](release-notes/v4-tucker/4.7.3.md) - Fix trailing slash issue with static component --- From 5271417cb87021a21f078584b95d729e2d37aad9 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Wed, 4 Mar 2026 10:48:47 -0700 Subject: [PATCH 16/51] Logging Section Migration (#450) * docs: migrate Logging section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate Logging section to v4 consolidated reference * fixup! docs: migrate Logging section to v4 consolidated reference * fixup! docs: migrate Logging section to v4 consolidated reference * fixup! docs: migrate Logging section to v4 consolidated reference --------- Co-authored-by: Claude Sonnet 4.6 --- .../logging-link-placeholders.md | 43 ++ .../version-v4/logging/api.md | 153 ++++++++ .../version-v4/logging/configuration.md | 370 ++++++++++++++++++ .../version-v4/logging/operations.md | 91 +++++ .../version-v4/logging/overview.md | 92 +++++ .../version-v4-sidebars.json | 28 ++ v4-docs-migration-map.md | 8 +- 7 files changed, 781 insertions(+), 4 deletions(-) create mode 100644 migration-context/link-placeholders/logging-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/logging/api.md create mode 100644 reference_versioned_docs/version-v4/logging/configuration.md create mode 100644 reference_versioned_docs/version-v4/logging/operations.md create mode 100644 reference_versioned_docs/version-v4/logging/overview.md diff --git a/migration-context/link-placeholders/logging-link-placeholders.md b/migration-context/link-placeholders/logging-link-placeholders.md new file mode 100644 index 00000000..212a0fc7 --- /dev/null +++ b/migration-context/link-placeholders/logging-link-placeholders.md @@ -0,0 +1,43 @@ +# Link Placeholders for Logging Section + +## reference_versioned_docs/version-v4/logging/overview.md + +- Line 14: `[Database](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: Noting that audit logging and transaction logging are documented in the database section + - Target should be: Database transaction/audit logging page + +- Line 62: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: Related section at bottom of file + - Target should be: Database transaction/audit logging page + +## reference_versioned_docs/version-v4/logging/configuration.md + +- Line 72: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: After describing logging.auditLog, pointing to where audit log details live + - Target should be: Database transaction/audit logging page + +- Line 133: `[HTTP Configuration](TODO:reference_versioned_docs/version-v4/http/configuration.md)` + - Context: Referencing HTTP logging config in the per-component section + - Target should be: HTTP configuration page (already exists: reference_versioned_docs/version-v4/http/configuration.md) + +- Line 192: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: Related section at bottom of file + - Target should be: Database transaction/audit logging page + +- Line 193: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: Related section — full config reference + - Target should be: Configuration section overview page + +## reference_versioned_docs/version-v4/logging/operations.md + +- Line 9: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: Callout noting that audit/transaction log operations are documented in the database section + - Target should be: Database transaction/audit logging page + +- Line 76: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: Related section at bottom of file + - Target should be: Database transaction/audit logging page + +- Line 77: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Related section — operations API overview + - Target should be: Operations API section overview page diff --git a/reference_versioned_docs/version-v4/logging/api.md b/reference_versioned_docs/version-v4/logging/api.md new file mode 100644 index 00000000..afcda71e --- /dev/null +++ b/reference_versioned_docs/version-v4/logging/api.md @@ -0,0 +1,153 @@ +--- +id: api +title: Logging API +--- + + + + + +## `logger` + +The `logger` global is available in all JavaScript components without any imports. It writes structured log entries to the standard Harper log file (`hdb.log`) at the configured `logging.external` level and path. See [Logging Configuration](./configuration#logging-external) for per-component log configuration. + +The `logger` global is a `MainLogger`. Calling `logger.withTag(tag)` returns a `TaggedLogger` scoped to that tag. + +### `MainLogger` + +`MainLogger` always has all log-level methods defined. It also exposes `withTag()` to create a `TaggedLogger`. + +```typescript +interface MainLogger { + trace(...messages: any[]): void; + debug(...messages: any[]): void; + info(...messages: any[]): void; + warn(...messages: any[]): void; + error(...messages: any[]): void; + fatal(...messages: any[]): void; + notify(...messages: any[]): void; + withTag(tag: string): TaggedLogger; +} +``` + +Each method corresponds to a log level. Only entries at or above the configured `logging.level` (or `logging.external.level`) are written. See [Log Levels](./overview#log-levels) for the full hierarchy. + +### `TaggedLogger` + +`TaggedLogger` is returned by `logger.withTag(tag)`. It prefixes every log entry with the given tag, making it easy to filter log output by component or context. + +Because `TaggedLogger` is bound to the configured log level at creation time, methods for levels that are currently disabled are `null`. Always use optional chaining (`?.`) when calling methods on a `TaggedLogger`. + +```typescript +interface TaggedLogger { + trace: ((...messages: any[]) => void) | null; + debug: ((...messages: any[]) => void) | null; + info: ((...messages: any[]) => void) | null; + warn: ((...messages: any[]) => void) | null; + error: ((...messages: any[]) => void) | null; + fatal: ((...messages: any[]) => void) | null; + notify: ((...messages: any[]) => void) | null; +} +``` + +`TaggedLogger` does not have a `withTag()` method. + +### Usage + +#### Basic logging with `logger` + +```javascript +export class MyResource extends Resource { + async get(id) { + logger.debug('Fetching record', { id }); + const record = await super.get(id); + if (!record) { + logger.warn('Record not found', { id }); + } + return record; + } + + async put(record) { + logger.info('Updating record', { id: record.id }); + try { + return await super.put(record); + } catch (err) { + logger.error('Failed to update record', err); + throw err; + } + } +} +``` + +#### Tagged logging with `withTag()` + +Create a tagged logger once per module or class and reuse it. Always use `?.` when calling methods since a given level may be `null` if it is below the configured log level. + +```javascript +const log = logger.withTag('my-resource'); + +export class MyResource extends Resource { + async get(id) { + log.debug?.('Fetching record', { id }); + const record = await super.get(id); + if (!record) { + log.warn?.('Record not found', { id }); + } + return record; + } + + async put(record) { + log.info?.('Updating record', { id: record.id }); + try { + return await super.put(record); + } catch (err) { + log.error?.('Failed to update record', err); + throw err; + } + } +} +``` + +Tagged entries appear in the log with the tag included in the entry header: + +``` +2023-03-09T14:25:05.269Z [info] [my-resource]: Updating record +``` + +### Log Entry Format + +Entries written via `logger` appear in `hdb.log` with the standard format: + +``` + [] [/]: +``` + +Entries written via a `TaggedLogger` include the tag: + +``` + [] []: +``` + +For external components, the thread context is set automatically based on which worker thread executes the code. + + + +## Related + +- [Logging Overview](./overview) +- [Logging Configuration](./configuration) +- [Logging Operations](./operations) diff --git a/reference_versioned_docs/version-v4/logging/configuration.md b/reference_versioned_docs/version-v4/logging/configuration.md new file mode 100644 index 00000000..39d7911e --- /dev/null +++ b/reference_versioned_docs/version-v4/logging/configuration.md @@ -0,0 +1,370 @@ +--- +id: configuration +title: Logging Configuration +--- + + + + + + +The `logging` section in `harperdb-config.yaml` controls standard log output. Many logging settings are applied dynamically without a restart (added in v4.6.0). + +## Main Logger + +### `logging.level` + +Type: `string` + +Default: `warn` + +Controls the verbosity of logs. Levels from least to most severe: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, `notify`. Setting a level includes that level and all more-severe levels. + +```yaml +logging: + level: warn +``` + +For example, `level: warn` results in `warn`, `error`, `fatal`, and `notify` logs. + +### `logging.path` + +Type: `string` + +Default: `/log/hdb.log` + +Full file path for the log file. + +```yaml +logging: + path: ~/hdb/log/hdb.log +``` + +### `logging.root` + +Type: `string` + +Default: `/log` + +Directory path where log files are written. Use `path` to specify the full filename; use `root` to specify only the directory (Harper determines the filename). + +```yaml +logging: + root: ~/hdb/log +``` + +### `logging.file` + +Type: `boolean` + +Default: `true` + +Whether to write logs to a file. Disable if you want to use only standard streams. + +```yaml +logging: + file: true +``` + +### `logging.stdStreams` + +Type: `boolean` + +Default: `false` + +Log to `stdout`/`stderr` in addition to (or instead of) the log file. + +When enabled, run Harper in the foreground (`harper`, not `harper start`). + +```yaml +logging: + stdStreams: true +``` + +### `logging.console` + +Type: `boolean` + +Default: `false` + +Controls whether `console.log` and other `console.*` calls (and anything writing to `process.stdout`/`process.stderr` from JS components) are captured to the log file. + +```yaml +logging: + console: true +``` + +### `logging.auditLog` + +Type: `boolean` + +Default: `false` + +Enables audit (table transaction) logging. When enabled, Harper records every insert, update, and delete to a corresponding audit table. Audit log data is accessed via the `read_audit_log` operation. + +See [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') for details on using audit logs. + +```yaml +logging: + auditLog: false +``` + +### `logging.auditRetention` + +Type: `string | number` + +Default: `3d` + +How long audit log entries are retained before automatic eviction. Accepts duration strings (e.g., `3d`, `12h`) or milliseconds. + +```yaml +logging: + auditRetention: 3d +``` + +## Log Rotation + +Rotation provides systematic management of the `hdb.log` file — compressing, archiving, and replacing it on a schedule or size threshold. Rotation is triggered when either `interval` or `maxSize` is set. + +> `interval` and `maxSize` are approximates only. The log file may exceed these values slightly before rotation occurs. + +### `logging.rotation.enabled` + +Type: `boolean` + +Default: `true` + +Enables log rotation. Rotation only activates when `interval` or `maxSize` is also set. + +### `logging.rotation.compress` + +Type: `boolean` + +Default: `false` + +Compress rotated log files with gzip. + +### `logging.rotation.interval` + +Type: `string` + +Default: `null` + +Time between rotations. Accepted units: `D` (days), `H` (hours), `M` (minutes). Example: `1D`, `12H`. + +### `logging.rotation.maxSize` + +Type: `string` + +Default: `null` + +Maximum log file size before rotation. Accepted units: `K` (kilobytes), `M` (megabytes), `G` (gigabytes). Example: `100M`, `1G`. + +### `logging.rotation.path` + +Type: `string` + +Default: `/log` + +Directory for storing rotated log files. Rotated files are named: `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. + +```yaml +logging: + rotation: + enabled: true + compress: false + interval: 1D + maxSize: 100M + path: ~/hdb/log +``` + +## Authentication Logging + +### `logging.auditAuthEvents.logFailed` + +Added in: v4.2.0 + +Type: `boolean` + +Default: `false` + +Log all failed authentication attempts. + +Example log entry: + +``` +[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"} +``` + +### `logging.auditAuthEvents.logSuccessful` + +Added in: v4.2.0 + +Type: `boolean` + +Default: `false` + +Log all successful authentication events. + +Example log entry: + +``` +[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"} +``` + +```yaml +logging: + auditAuthEvents: + logFailed: false + logSuccessful: false +``` + +## Per-Component Logging + +Added in: v4.6.0 + +Harper supports independent logging configurations for different components. Each component logger can have its own `path`, `root`, `level`, `tag`, and `stdStreams` settings. All components default to the main `logging` configuration unless overridden. + +### `logging.external` + +Logging configuration for all external components that use the [`logger` API](./api). + +```yaml +logging: + external: + level: warn + path: ~/hdb/log/apps.log +``` + +### `http.logging` + +HTTP request logging. Disabled by default — defining this section enables it. + +```yaml +http: + logging: + level: info # info = all requests, warn = 4xx+, error = 5xx + path: ~/hdb/log/http.log + timing: true # log request duration + headers: false # log request headers (verbose) + id: true # assign and log a unique request ID per request +``` + +See [HTTP Configuration](TODO:reference_versioned_docs/version-v4/http/configuration.md 'HTTP logging configuration') for full details. + +### `mqtt.logging` + +MQTT logging configuration. Accepts standard logging options. + +```yaml +mqtt: + logging: + level: warn + path: ~/hdb/log/mqtt.log + stdStreams: false +``` + +### `authentication.logging` + +Authentication subsystem logging. Accepts standard logging options. + +```yaml +authentication: + logging: + level: warn + path: ~/hdb/log/auth.log +``` + +### `replication.logging` + +Replication subsystem logging. Accepts standard logging options. + +```yaml +replication: + logging: + level: warn + path: ~/hdb/log/replication.log +``` + +### `tls.logging` + +TLS subsystem logging. Accepts standard logging options. + +```yaml +tls: + logging: + level: warn + path: ~/hdb/log/tls.log +``` + +### `storage.logging` + +Database storage subsystem logging. Accepts standard logging options. + +```yaml +storage: + logging: + level: warn + path: ~/hdb/log/storage.log +``` + +### `analytics.logging` + +Analytics subsystem logging. Accepts standard logging options. + +```yaml +analytics: + logging: + level: warn + path: ~/hdb/log/analytics.log +``` + +## Clustering Log Level + +Clustering has a separate log level due to its verbosity. Configure with `clustering.logLevel`. + +Valid levels from least verbose: `error`, `warn`, `info`, `debug`, `trace`. + +```yaml +clustering: + logLevel: warn +``` + +## Complete Example + +```yaml +logging: + level: warn + path: ~/hdb/log/hdb.log + file: true + stdStreams: false + console: false + auditLog: false + auditRetention: 3d + rotation: + enabled: true + compress: false + interval: 1D + maxSize: 100M + path: ~/hdb/log + auditAuthEvents: + logFailed: false + logSuccessful: false + external: + level: warn + path: ~/hdb/log/apps.log + +http: + logging: + level: warn + path: ~/hdb/log/http.log + timing: true +``` + +## Related + +- [Logging Overview](./overview) +- [Logging API](./api) +- [Logging Operations](./operations) +- [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') +- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') diff --git a/reference_versioned_docs/version-v4/logging/operations.md b/reference_versioned_docs/version-v4/logging/operations.md new file mode 100644 index 00000000..ab288b88 --- /dev/null +++ b/reference_versioned_docs/version-v4/logging/operations.md @@ -0,0 +1,91 @@ +--- +id: operations +title: Logging Operations +--- + + + + +Operations for reading the standard Harper log (`hdb.log`). All operations are restricted to `super_user` roles only. + +> Audit log and transaction log operations (`read_audit_log`, `read_transaction_log`, `delete_audit_logs_before`, `delete_transaction_logs_before`) are documented in [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging operations'). + +--- + +## `read_log` + +Returns log entries from the primary Harper log (`hdb.log`) matching the provided criteria. + +_Restricted to super_user roles only._ + +### Parameters + +| Parameter | Required | Type | Description | +| ----------- | -------- | ------ | ------------------------------------------------------------------------------------------------------ | +| `operation` | Yes | string | Must be `"read_log"` | +| `start` | No | number | Result offset to start from. Default: `0` (first entry in `hdb.log`). | +| `limit` | No | number | Maximum number of entries to return. Default: `1000`. | +| `level` | No | string | Filter by log level. One of: `notify`, `error`, `warn`, `info`, `debug`, `trace`. Default: all levels. | +| `from` | No | string | Start of time window. Format: `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default: first entry in log. | +| `until` | No | string | End of time window. Format: `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default: last entry in log. | +| `order` | No | string | Sort order: `asc` or `desc` by timestamp. Default: maintains `hdb.log` order. | +| `filter` | No | string | A substring that must appear in each returned log line. | + +### Request + +```json +{ + "operation": "read_log", + "start": 0, + "limit": 1000, + "level": "error", + "from": "2021-01-25T22:05:27.464+0000", + "until": "2021-01-25T23:05:27.464+0000", + "order": "desc" +} +``` + +### Response + +```json +[ + { + "level": "notify", + "message": "Connected to cluster server.", + "timestamp": "2021-01-25T23:03:20.710Z", + "thread": "main/0", + "tags": [] + }, + { + "level": "warn", + "message": "Login failed", + "timestamp": "2021-01-25T22:24:45.113Z", + "thread": "http/9", + "tags": [] + }, + { + "level": "error", + "message": "unknown attribute 'name and breed'", + "timestamp": "2021-01-25T22:23:24.167Z", + "thread": "http/9", + "tags": [] + } +] +``` + +### Response Fields + +| Field | Type | Description | +| ----------- | ------ | ------------------------------------------------------------------------------------------------------- | +| `level` | string | Log level of the entry. | +| `message` | string | Log message. | +| `timestamp` | string | ISO 8601 timestamp when the event occurred. | +| `thread` | string | Thread name and ID (e.g., `main/0`, `http/3`). | +| `tags` | array | Additional context tags. Entries from components may include `custom-function` or other component tags. | + +## Related + +- [Logging Overview](./overview) +- [Logging Configuration](./configuration) +- [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit log and transaction log operations') +- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') diff --git a/reference_versioned_docs/version-v4/logging/overview.md b/reference_versioned_docs/version-v4/logging/overview.md new file mode 100644 index 00000000..1bada4a3 --- /dev/null +++ b/reference_versioned_docs/version-v4/logging/overview.md @@ -0,0 +1,92 @@ +--- +id: overview +title: Logging +--- + + + + + + +Harper's core logging system is used for diagnostics, monitoring, and observability. It has an extensive configuration system, and even supports feature-specific (per-component) configurations in latest versions. Furthermore, the `logger` global API is available for creating custom logs from any JavaScript application or plugin code. + +> If you are looking for information on Harper's Audit and Transaction logging system, refer to the [Database](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') section. + +## Log File + +Changed in: v4.1.0 — All logs consolidated into a single `hdb.log` file + +All standard log output is written to `/log/hdb.log` (default: `~/hdb/log/hdb.log`). + +## Log Entry Format + +Each log entry follows this structure: + +``` + [/] [] ...[]: +``` + +Example: + +``` +2023-03-09T14:25:05.269Z [main/0] [notify]: HarperDB successfully started. +``` + +Fields: + +| Field | Description | +| ----------- | -------------------------------------------------------------------------------------------- | +| `timestamp` | ISO 8601 date/time when the event occurred. | +| `level` | Severity level. See [Log Levels](#log-levels) below. | +| `thread/id` | Name and ID of the thread that produced the log entry (generally, `main`, `http`, or `job`). | +| `tags` | Additional context tags (e.g., `custom-function`, `auth-event`). Most entries have no tags. | +| `message` | The log message. | + +### Log Levels + +From least to most severe (most verbose to least verbose): + +| Level | Description | +| -------- | --------------------------------------------------------------------------------------------- | +| `trace` | Highly detailed internal execution tracing. | +| `debug` | Diagnostic information useful during development. | +| `info` | General operational events. | +| `warn` | Potential issues that don't prevent normal operation. | +| `error` | Errors that affect specific operations. | +| `fatal` | Critical errors causing process termination. | +| `notify` | Important operational milestones (e.g., "server started"). Always logged regardless of level. | + +The default log level is `warn`. Setting a level includes that level and all more-severe levels. For example, `warn` logs `warn`, `error`, `fatal`, and `notify`. + +## Standard Streams + +Changed in: v4.6.0 + +By default, logs are written only to the log file. To also log to `stdout`/`stderr`, set [`logging.stdStreams: true`](./configuration.md#loggingstdstreams) (this is automatically enabled by the `DEFAULT_MODE=dev` configuration during installation). + +When logging to standard streams, run Harper in the foreground (i.e. `harper`, not `harper start`). + +As of v4.6.0, logging to standard streams does **not** include timestamps, and console logging (`console.log`, etc.) does not get forwarded to log files unless the [`logging.console: true`](./configuration.md#loggingconsole) option is enabled. + +## Logger API + +JavaScript components can use the `logger` global to write structured log entries: + +```javascript +logger.trace('detailed trace message'); +logger.debug('debug info', { someContext: 'value' }); +logger.info('informational message'); +logger.warn('potential issue'); +logger.error('error occurred', error); +logger.fatal('fatal error'); +logger.notify('server is ready'); +``` + +The `logger` global provides `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` methods. The logger is based on the Node.js Console API. See [Logging API](./api) for full details. + +## Related + +- [Logging Configuration](./configuration) +- [Logging API](./api) +- [Logging Operations](./operations) +- [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 70545900..3deba702 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -87,6 +87,34 @@ } ] }, + { + "type": "category", + "label": "Logging", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "logging/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "logging/configuration", + "label": "Configuration" + }, + { + "type": "doc", + "id": "logging/api", + "label": "API" + }, + { + "type": "doc", + "id": "logging/operations", + "label": "Operations" + } + ] + }, { "type": "category", "label": "GraphQL Querying", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 75a7d67a..d0b689ef 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -553,7 +553,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/administration/logging/index.md` - **Additional Sources**: Current `reference/logging.md` (if exists) -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Logging revamped, consolidated into hdb.log - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Major logging improvements @@ -564,21 +564,21 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Per-component logging: v4.6.0 - Granular configuration: v4.6.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Per-component logging, dynamic reloading, HTTP logging ### `reference/logging/api.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (logger global) -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Logger based on Node.js Console API ### `reference/logging/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/logs.md` -- **Status**: Not Started +- **Status**: In Progress - **Notes**: Operations for managing standard logs (not transaction/audit logs, which moved to database section) --- From 5fa176712840179889e16adb64e2cfe2c4deade7 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 10 Mar 2026 09:51:27 -0600 Subject: [PATCH 17/51] Analytics Section Migration (#451) * docs: migrate Analytics section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate Analytics section to v4 consolidated reference --------- Co-authored-by: Claude Sonnet 4.6 --- .../analytics-link-placeholders.md | 41 ++++ .../version-v4/analytics/operations.md | 136 ++++++++++++ .../version-v4/analytics/overview.md | 207 ++++++++++++++++++ .../version-v4-sidebars.json | 18 ++ v4-docs-migration-map.md | 4 +- 5 files changed, 404 insertions(+), 2 deletions(-) create mode 100644 migration-context/link-placeholders/analytics-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/analytics/operations.md create mode 100644 reference_versioned_docs/version-v4/analytics/overview.md diff --git a/migration-context/link-placeholders/analytics-link-placeholders.md b/migration-context/link-placeholders/analytics-link-placeholders.md new file mode 100644 index 00000000..15a40ca0 --- /dev/null +++ b/migration-context/link-placeholders/analytics-link-placeholders.md @@ -0,0 +1,41 @@ +# Link Placeholders for Analytics Section + +## reference_versioned_docs/version-v4/analytics/overview.md + +- Line 99: `[server.recordAnalytics()](TODO:reference_versioned_docs/version-v4/http/api.md)` + - Context: Noting that applications can record custom metrics via this API + - Target should be: HTTP API page, `server.recordAnalytics` section (file already exists) + +- Line 103: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)` + - Context: Related section — custom metrics API + - Target should be: HTTP API page (file already exists) + +- Line 105: `[analytics.logging](TODO:reference_versioned_docs/version-v4/logging/configuration.md)` + - Context: Per-component analytics logging configuration + - Target should be: Logging configuration page (already migrated in PR #450) + +- Line 106: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: analytics.aggregatePeriod configuration + - Target should be: Configuration section overview page + +- Line 110: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)` + - Context: Related section at bottom of file + - Target should be: HTTP API page (file already exists) + +- Line 111: `[Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md)` + - Context: Related section at bottom of file + - Target should be: Logging configuration page (already migrated in PR #450) + +- Line 112: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: Related section at bottom of file + - Target should be: Configuration section overview page + +## reference_versioned_docs/version-v4/analytics/operations.md + +- Line 56: `[search_by_conditions](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` + - Context: Note that `conditions` parameter uses the same format as search_by_conditions + - Target should be: Operations API operations page + +- Line 77: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Related section at bottom of file + - Target should be: Operations API section overview page diff --git a/reference_versioned_docs/version-v4/analytics/operations.md b/reference_versioned_docs/version-v4/analytics/operations.md new file mode 100644 index 00000000..c5449b04 --- /dev/null +++ b/reference_versioned_docs/version-v4/analytics/operations.md @@ -0,0 +1,136 @@ +--- +id: operations +title: Analytics Operations +--- + + + + + +Operations for querying Harper analytics data. All operations require `superuser` permission. + +Analytics data can also be queried directly via `search_by_conditions` on the `hdb_raw_analytics` and `hdb_analytics` tables in the `system` database — see [Analytics Overview](./overview) for details on the table structure. + +--- + +## `list_metrics` + +Returns the list of available metric names that can be queried with `get_analytics`. + +### Parameters + +| Parameter | Required | Type | Description | +| -------------- | -------- | -------- | ------------------------------------------------------------------------ | +| `operation` | Yes | string | Must be `"list_metrics"` | +| `metric_types` | No | string[] | Filter by type: `"builtin"`, `"custom"`, or both. Default: `["builtin"]` | + +### Request + +```json +{ + "operation": "list_metrics", + "metric_types": ["custom", "builtin"] +} +``` + +### Response + +```json +["resource-usage", "table-size", "database-size", "main-thread-utilization", "utilization", "storage-volume"] +``` + +--- + +## `describe_metric` + +Returns the structure and available attributes for a specific metric. + +### Parameters + +| Parameter | Required | Type | Description | +| ----------- | -------- | ------ | ------------------------------ | +| `operation` | Yes | string | Must be `"describe_metric"` | +| `metric` | Yes | string | Name of the metric to describe | + +### Request + +```json +{ + "operation": "describe_metric", + "metric": "resource-usage" +} +``` + +### Response + +```json +{ + "attributes": [ + { "name": "id", "type": "number" }, + { "name": "metric", "type": "string" }, + { "name": "userCPUTime", "type": "number" }, + { "name": "systemCPUTime", "type": "number" }, + { "name": "node", "type": "string" } + ] +} +``` + +--- + +## `get_analytics` + +Queries analytics data for a specific metric over a time range. + +### Parameters + +| Parameter | Required | Type | Description | +| ---------------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `operation` | Yes | string | Must be `"get_analytics"` | +| `metric` | Yes | string | Metric name — use `list_metrics` to get valid values | +| `start_time` | No | number | Start of time range as Unix timestamp in milliseconds | +| `end_time` | No | number | End of time range as Unix timestamp in milliseconds | +| `get_attributes` | No | string[] | Attributes to include in each result. If omitted, all attributes are returned | +| `conditions` | No | object[] | Additional filter conditions. Same format as [`search_by_conditions`](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'Operations API — search_by_conditions') | + +### Request + +```json +{ + "operation": "get_analytics", + "metric": "resource-usage", + "start_time": 1769198332754, + "end_time": 1769198532754, + "get_attributes": ["id", "metric", "userCPUTime", "systemCPUTime"], + "conditions": [ + { + "attribute": "node", + "operator": "equals", + "value": "node1.example.com" + } + ] +} +``` + +### Response + +```json +[ + { + "id": "12345", + "metric": "resource-usage", + "userCPUTime": 100, + "systemCPUTime": 50 + }, + { + "id": "67890", + "metric": "resource-usage", + "userCPUTime": 150, + "systemCPUTime": 75 + } +] +``` + +## Related + +- [Analytics Overview](./overview) +- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Full Operations API reference') diff --git a/reference_versioned_docs/version-v4/analytics/overview.md b/reference_versioned_docs/version-v4/analytics/overview.md new file mode 100644 index 00000000..e2d0d8e9 --- /dev/null +++ b/reference_versioned_docs/version-v4/analytics/overview.md @@ -0,0 +1,207 @@ +--- +id: overview +title: Analytics +--- + + + + + +Added in: v4.5.0 (resource and storage analytics) + +Harper collects real-time telemetry and statistics across all operations, URL endpoints, and messaging topics. This data can be used to monitor server health, understand traffic and usage patterns, identify resource-intensive queries, and inform scaling decisions. + +## Storage Tables + +Analytics data is stored in two system tables in the `system` database: + +| Table | Description | +| ------------------- | ------------------------------------------------------------------------------------------- | +| `hdb_raw_analytics` | Per-second raw entries recorded by each thread. One record per second per active thread. | +| `hdb_analytics` | Aggregate entries recorded once per minute, summarizing all per-second data across threads. | + +Both tables require `superuser` permission to query. + +## Raw Analytics (`hdb_raw_analytics`) + +Raw entries are recorded once per second (when there is activity) by each thread. Each record captures all activity in the last second along with system resource information. Records use the timestamp in milliseconds since epoch as the primary key. + +Query raw analytics using `search_by_conditions` on the `hdb_raw_analytics` table. The example below fetches 10 seconds of raw entries: + +```http +POST http://localhost:9925 +Content-Type: application/json + +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_raw_analytics", + "conditions": [{ + "search_attribute": "id", + "search_type": "between", + "search_value": [1688594000000, 1688594010000] + }] +} +``` + +Example raw entry: + +```json +{ + "time": 1688594390708, + "period": 1000.8336279988289, + "metrics": [ + { + "metric": "bytes-sent", + "path": "search_by_conditions", + "type": "operation", + "median": 202, + "mean": 202, + "p95": 202, + "p90": 202, + "count": 1 + }, + { + "metric": "memory", + "threadId": 2, + "rss": 1492664320, + "heapTotal": 124596224, + "heapUsed": 119563120, + "external": 3469790, + "arrayBuffers": 798721 + }, + { + "metric": "utilization", + "idle": 138227.52767700003, + "active": 70.5066209952347, + "utilization": 0.0005098165086230495 + } + ], + "threadId": 2, + "totalBytesProcessed": 12182820, + "id": 1688594390708.6853 +} +``` + +## Aggregate Analytics (`hdb_analytics`) + +Aggregate entries are recorded once per minute, combining per-second raw entries from all threads into a single summary record. Use `search_by_conditions` on the `hdb_analytics` table with a broader time range: + +```http +POST http://localhost:9925 +Content-Type: application/json + +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_analytics", + "conditions": [{ + "search_attribute": "id", + "search_type": "between", + "search_value": [1688194100000, 1688594990000] + }] +} +``` + +Example aggregate entry: + +```json +{ + "period": 60000, + "metric": "bytes-sent", + "method": "connack", + "type": "mqtt", + "median": 4, + "mean": 4, + "p95": 4, + "p90": 4, + "count": 1, + "id": 1688589569646, + "time": 1688589569646 +} +``` + +## Standard Metrics + +Harper automatically tracks the following metrics for all services. Applications can also define custom metrics via [`server.recordAnalytics()`](TODO:reference_versioned_docs/version-v4/http/api.md 'HTTP API — server.recordAnalytics'). + +### HTTP Metrics + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ------------------ | ------------- | -------------- | --------------------------- | ----- | ---------------------------------------- | +| `duration` | resource path | request method | `cache-hit` or `cache-miss` | ms | Duration of request handler | +| `duration` | route path | request method | `fastify-route` | ms | Duration of Fastify route handler | +| `duration` | operation | | `operation` | ms | Duration of Operations API operation | +| `success` | resource path | request method | | % | Percentage of successful requests | +| `success` | route path | request method | `fastify-route` | % | | +| `success` | operation | | `operation` | % | | +| `bytes-sent` | resource path | request method | | bytes | Response bytes sent | +| `bytes-sent` | route path | request method | `fastify-route` | bytes | | +| `bytes-sent` | operation | | `operation` | bytes | | +| `transfer` | resource path | request method | `operation` | ms | Duration of response transfer | +| `transfer` | route path | request method | `fastify-route` | ms | | +| `transfer` | operation | | `operation` | ms | | +| `socket-routed` | | | | % | Percentage of sockets immediately routed | +| `tls-handshake` | | | | ms | TLS handshake duration | +| `tls-reused` | | | | % | Percentage of TLS sessions reused | +| `cache-hit` | table name | | | % | Percentage of cache hits | +| `cache-resolution` | table name | | | ms | Duration of resolving uncached entries | + +### MQTT / WebSocket Metrics + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ------------------ | ------ | ------------ | ------ | ----- | ------------------------------------------------ | +| `mqtt-connections` | | | | count | Number of open direct MQTT connections | +| `ws-connections` | | | | count | Number of open WebSocket connections | +| `connection` | `mqtt` | `connect` | | % | Percentage of successful direct MQTT connections | +| `connection` | `mqtt` | `disconnect` | | % | Percentage of explicit direct MQTT disconnects | +| `connection` | `ws` | `connect` | | % | Percentage of successful WebSocket connections | +| `connection` | `ws` | `disconnect` | | % | Percentage of explicit WebSocket disconnects | +| `bytes-sent` | topic | mqtt command | `mqtt` | bytes | Bytes sent for a given MQTT command and topic | + +### Replication Metrics + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ---------------- | ------------- | ------------- | --------- | ----- | ----------------------------------- | +| `bytes-sent` | node.database | `replication` | `egress` | bytes | Bytes sent for replication | +| `bytes-sent` | node.database | `replication` | `blob` | bytes | Bytes sent for blob replication | +| `bytes-received` | node.database | `replication` | `ingress` | bytes | Bytes received for replication | +| `bytes-received` | node.database | `replication` | `blob` | bytes | Bytes received for blob replication | + +### Resource Usage Metrics + +| `metric` | Key attributes | Other | Unit | Description | +| ------------------------- | ------------------------------------------------------------------------------------------------ | ------------------- | ------- | --------------------------------------------------------------------------------- | +| `database-size` | `size`, `used`, `free`, `audit` | `database` | bytes | Database file size breakdown | +| `main-thread-utilization` | `idle`, `active`, `taskQueueLatency`, `rss`, `heapTotal`, `heapUsed`, `external`, `arrayBuffers` | `time` | various | Main thread resource usage: idle/active time, queue latency, and memory breakdown | +| `resource-usage` | (see below) | | various | Node.js process resource usage (see [resource-usage](#resource-usage-metric)) | +| `storage-volume` | `available`, `free`, `size` | `database` | bytes | Storage volume size breakdown | +| `table-size` | `size` | `database`, `table` | bytes | Table file size | +| `utilization` | | | % | Percentage of time the worker thread was processing requests | + +#### `resource-usage` Metric + +Includes everything returned by Node.js [`process.resourceUsage()`](https://nodejs.org/api/process.html#processresourceusage) (with `userCPUTime` and `systemCPUTime` converted to milliseconds), plus: + +| Field | Unit | Description | +| ---------------- | ---- | ------------------------------------------- | +| `time` | ms | Unix timestamp when the metric was recorded | +| `period` | ms | Duration of the measurement period | +| `cpuUtilization` | % | CPU utilization (user + system combined) | + +## Custom Metrics + +Applications can record custom metrics using the `server.recordAnalytics()` API. See [HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md 'server.recordAnalytics API') for details. + +## Analytics Configuration + +The `analytics.aggregatePeriod` configuration option controls how frequently aggregate summaries are written. See [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') for details. + +Per-component analytics logging can be configured via `analytics.logging`. See [Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md 'analytics.logging configuration'). + +## Related + +- [Analytics Operations](./operations) +- [HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md 'server.recordAnalytics') +- [Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md 'analytics.logging') +- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full configuration reference') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 3deba702..a537f24e 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -115,6 +115,24 @@ } ] }, + { + "type": "category", + "label": "Analytics", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "analytics/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "analytics/operations", + "label": "Operations" + } + ] + }, { "type": "category", "label": "GraphQL Querying", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index d0b689ef..4d698c84 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -592,7 +592,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Resource analytics: v4.5.0 - Storage analytics: v4.5.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Resource and storage analytics - [4.7.0](release-notes/v4-tucker/4.7.0.md) - New analytics and licensing functionality @@ -600,7 +600,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ### `reference/analytics/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/analytics.md` -- **Status**: Not Started +- **Status**: In Progress --- From e46a359f2b0b6d9d9e08a80bfe84dadf19e80d95 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Wed, 11 Mar 2026 09:08:13 -0600 Subject: [PATCH 18/51] MQTT Section Migration (#449) * docs: migrate MQTT section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate MQTT section to v4 consolidated reference * whoops - missed a space --------- Co-authored-by: Claude Sonnet 4.6 --- .../mqtt-link-placeholders.md | 41 ++++ .../version-v4/mqtt/configuration.md | 231 ++++++++++++++++++ .../version-v4/mqtt/overview.md | 142 +++++++++++ .../version-v4-sidebars.json | 18 ++ v4-docs-migration-map.md | 14 +- 5 files changed, 439 insertions(+), 7 deletions(-) create mode 100644 migration-context/link-placeholders/mqtt-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/mqtt/configuration.md create mode 100644 reference_versioned_docs/version-v4/mqtt/overview.md diff --git a/migration-context/link-placeholders/mqtt-link-placeholders.md b/migration-context/link-placeholders/mqtt-link-placeholders.md new file mode 100644 index 00000000..63a2050c --- /dev/null +++ b/migration-context/link-placeholders/mqtt-link-placeholders.md @@ -0,0 +1,41 @@ +# Link Placeholders for MQTT Section + +## reference_versioned_docs/version-v4/mqtt/overview.md + +- Line 28: `[schema.graphql](TODO:reference_versioned_docs/version-v4/database/schema.md)` + - Context: Explaining how to define a table that becomes an MQTT topic namespace + - Target should be: Schema definition reference page (database section) + +- Line 101: `[MQTT Configuration](TODO:reference_versioned_docs/version-v4/http/overview.md)` + - Context: "See HTTP Overview" in the Related section + - Target should be: HTTP overview (handles MQTT over WebSocket) + +- Line 103: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Related section — TLS and mTLS overview + - Target should be: Security section overview page + +- Line 104: `[Database Schema](TODO:reference_versioned_docs/version-v4/database/schema.md)` + - Context: Related section — defining tables/topics + - Target should be: Database schema reference page + +- Line 105: `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` + - Context: Related section — noting REST and MQTT share the same path conventions + - Target should be: REST section overview page + +## reference_versioned_docs/version-v4/mqtt/configuration.md + +- Line 20: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)` + - Context: Describing mqtt.network.securePort — links to TLS config + - Target should be: TLS configuration page (in http section) + +- Line 133: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)` + - Context: Related section link at bottom of file + - Target should be: TLS configuration page (in http section) + +- Line 134: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` + - Context: Related section link at bottom of file + - Target should be: Security section overview page + +- Line 135: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: Related section — full harperdb-config.yaml reference + - Target should be: Configuration section overview page diff --git a/reference_versioned_docs/version-v4/mqtt/configuration.md b/reference_versioned_docs/version-v4/mqtt/configuration.md new file mode 100644 index 00000000..51d0afac --- /dev/null +++ b/reference_versioned_docs/version-v4/mqtt/configuration.md @@ -0,0 +1,231 @@ +--- +id: configuration +title: MQTT Configuration +--- + + + + + + + +The `mqtt` section in `harperdb-config.yaml` controls Harper's built-in MQTT broker. MQTT is enabled by default. + +Harper must be restarted for configuration changes to take effect. + +## Minimal Example + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + webSocket: true + requireAuthentication: true +``` + +## Ports + +### `mqtt.network.port` + +Type: `integer` + +Default: `1883` + +The port for plaintext (non-TLS) MQTT connections. + +### `mqtt.network.securePort` + +Type: `integer` + +Default: `8883` + +The port for secure MQTT connections (MQTTS). Uses the `tls` configuration for certificates. See [TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md 'TLS configuration shared by HTTP and MQTT') for certificate setup. + +## WebSocket + +### `mqtt.webSocket` + +Type: `boolean` + +Default: `true` + +Enables MQTT over WebSockets. When enabled, Harper handles WebSocket connections on the HTTP port (default `9926`) that specify the `mqtt` sub-protocol (`Sec-WebSocket-Protocol: mqtt`). This is required by the MQTT specification and should be set by any conformant MQTT-over-WebSocket client. + +```yaml +mqtt: + webSocket: true +``` + +## Authentication + +### `mqtt.requireAuthentication` + +Type: `boolean` + +Default: `true` + +Controls whether credentials are required to establish an MQTT connection. When `true`, clients must authenticate with either a username/password or a valid mTLS client certificate. + +When set to `false`, unauthenticated connections are allowed. Unauthenticated clients are still subject to authorization on each publish and subscribe operation — by default, tables and resources do not grant access to unauthenticated users, but this can be configured at the resource level. + +```yaml +mqtt: + requireAuthentication: true +``` + +## mTLS + +### `mqtt.network.mtls` + +Added in: v4.3.0 + +Type: `boolean | object` + +Default: `false` + +Enables mutual TLS (mTLS) authentication for MQTT connections. When set to `true`, client certificates are verified against the CA specified in the root `tls.certificateAuthority` section. Authenticated connections use the `CN` (common name) from the client certificate's subject as the Harper username by default. + +```yaml +mqtt: + network: + mtls: true +``` + +For granular control, specify an object with the following optional properties: + +### `mqtt.network.mtls.user` + +Type: `string | null` + +Default: Common Name from client certificate + +Specifies a fixed username to authenticate all mTLS connections as. When set, any connection that passes certificate verification authenticates as this user regardless of the certificate's CN. + +Setting to `null` disables credential-based authentication for mTLS connections. When combined with `required: true`, this enforces that clients must have a valid certificate AND provide separate credential-based authentication. + +### `mqtt.network.mtls.required` + +Type: `boolean` + +Default: `false` + +When `true`, all incoming MQTT connections must provide a valid client certificate. Connections without a valid certificate are rejected. By default, clients can authenticate with either mTLS or standard username/password credentials. + +### `mqtt.network.mtls.certificateAuthority` + +Type: `string` + +Default: Path from `tls.certificateAuthority` + +Path to the certificate authority (CA) file used to verify MQTT client certificates. By default, uses the CA configured in the root `tls` section. Set this if MQTT clients should be verified against a different CA than the one used for HTTP/TLS. + +### `mqtt.network.mtls.certificateVerification` + +Type: `boolean | object` + +Default: `true` + +When mTLS is enabled, Harper verifies the revocation status of client certificates using OCSP (Online Certificate Status Protocol). This ensures revoked certificates cannot be used for authentication. + +Set to `false` to disable revocation checking, or configure as an object: + +| Property | Type | Default | Description | +| ------------- | ------- | ------------- | ------------------------------------------------------------------------------------------------------ | +| `timeout` | integer | `5000` | Maximum milliseconds to wait for an OCSP response. | +| `cacheTtl` | integer | `3600000` | Milliseconds to cache successful verification results (default 1h). | +| `failureMode` | string | `'fail-open'` | Behavior when OCSP verification fails: `'fail-open'` (allow, log warning) or `'fail-closed'` (reject). | + +```yaml +mqtt: + network: + mtls: + required: true + certificateVerification: + failureMode: fail-closed + timeout: 5000 + cacheTtl: 3600000 +``` + +## mTLS Examples + +```yaml +# Require client certificate + standard credentials (combined auth) +mqtt: + network: + mtls: + user: null + required: true + +# Authenticate all mTLS connections as a fixed user +mqtt: + network: + mtls: + user: mqtt-service-account + required: true + +# mTLS optional — clients can use mTLS or credentials +mqtt: + network: + mtls: true +``` + +## Logging + +### `mqtt.logging` + +Type: `object` + +Default: disabled + +Configures logging for MQTT activity. Accepts the standard logging configuration options. + +```yaml +mqtt: + logging: + path: ~/hdb/log/mqtt.log + level: warn + stdStreams: false +``` + +| Option | Description | +| ------------ | ------------------------------------------------- | +| `path` | File path for the MQTT log output. | +| `root` | Alternative to `path` — sets the log directory. | +| `level` | Log level: `error`, `warn`, `info`, `debug`, etc. | +| `tag` | Custom tag to prefix log entries. | +| `stdStreams` | When `true`, also logs to stdout/stderr. | + +## Complete Example + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + mtls: + required: false + certificateAuthority: ~/hdb/keys/ca.pem + certificateVerification: + failureMode: fail-open + timeout: 5000 + cacheTtl: 3600000 + webSocket: true + requireAuthentication: true + logging: + level: warn + path: ~/hdb/log/mqtt.log + +# TLS is a top-level section, shared with HTTP +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +## Related + +- [MQTT Overview](./overview) +- [TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md 'TLS configuration shared by MQTT and HTTP') +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security, certificates, and mTLS overview') +- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') diff --git a/reference_versioned_docs/version-v4/mqtt/overview.md b/reference_versioned_docs/version-v4/mqtt/overview.md new file mode 100644 index 00000000..553f4135 --- /dev/null +++ b/reference_versioned_docs/version-v4/mqtt/overview.md @@ -0,0 +1,142 @@ +--- +id: overview +title: MQTT +--- + + + + + + +Added in: v4.2.0 + +Harper includes a built-in MQTT broker that provides real-time pub/sub messaging deeply integrated with the database. Unlike a generic MQTT broker, Harper's MQTT implementation connects topics directly to database records — publishing to a topic writes to the database, and subscribing to a topic delivers live updates for the corresponding record. + +## How Topics Map to Database Records + +MQTT topics in Harper follow the same path convention as REST endpoints. If you define a table or resource with an endpoint path of `my-resource`, the corresponding MQTT topic namespace is `my-resource`. + +A topic of `my-resource/some-id` corresponds to the record with id `some-id` in the `my-resource` table (or custom resource). This means: + +- **Subscribing** to `my-resource/some-id` delivers notification messages whenever that record is updated or deleted. +- The **current value** of the record is treated as the retained message for that topic. On subscription, the subscriber immediately receives the current record as the initial retained message — no separate GET request needed. +- **Publishing** with the `retain` flag set replaces the record in the database (equivalent to a PUT operation). +- **Publishing without** the `retain` flag delivers the message to current subscribers without writing to the database. + +Defining a table that creates a topic can be as simple as adding a table with no attributes to your [schema.graphql](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition for defining tables and topics') in a Harper application: + +```graphql +type MyTopic @table @export +``` + +## Protocol Support + +Harper supports MQTT versions **v3.1.1** and **v5**, with standard publish/subscribe capabilities. + +### Topics and Wildcards + +Harper supports multi-level topics for both publishing and subscribing: + +- **Multi-level wildcard (`#`)** — Subscribe to `my-resource/#` to receive notifications for all records in that resource, including nested paths (`my-resource/some-id`, `my-resource/nested/id`). +- **Single-level wildcard (`+`)** — Added in v4.3.0. Subscribe to `my-resource/+/status` to match any single path segment. + +### QoS Levels + +- **QoS 0** — At most once delivery (fire and forget). +- **QoS 1** — At least once delivery (acknowledged delivery). +- **QoS 2** — Harper can perform the QoS 2 conversation but does not guarantee exactly-once delivery. + +### Sessions + +- **Clean sessions** — Subscriptions and queued messages are discarded on disconnect. +- **Durable sessions** — Subscriptions and queued messages are persisted across reconnects. + +### Last Will + +Added in: v4.3.0 (inferred from version comparison, needs verification) + +Harper supports the MQTT Last Will and Testament feature. If a client disconnects unexpectedly, the broker publishes the configured will message on its behalf. + +## Content Negotiation + +Harper handles structured data natively. Messages can be published and received in any supported structured format — JSON, CBOR, or MessagePack — and Harper stores and delivers them as structured objects. Different clients can independently choose their preferred format: one client may publish in JSON while another subscribes and receives in CBOR. + +## Ordering and Distributed Delivery + +Harper is designed for distributed, low-latency message delivery. Messages are delivered to subscribers immediately on arrival — Harper does not delay delivery to coordinate consensus across nodes. + +In a distributed cluster, messages may arrive out of order due to network topology. The behavior depends on whether the message is retained or non-retained: + +- **Retained messages** (published with `retain: true`, or written via PUT/upsert) maintain eventual consistency across the cluster. Harper keeps the message with the latest timestamp as the winning record state. An out-of-order earlier message will not be re-delivered to clients; the cluster converges to the most recent state. +- **Non-retained messages** are always delivered to local subscribers when received, even if they arrive out of order. Every message is delivered, prioritizing completeness over strict ordering. + +**Non-retained messages** are suited for applications like chat where every message must be delivered. **Retained messages** are suited for sensor readings or state updates where only the latest value matters. + +## Authentication + +MQTT connections support two authentication methods: + +- **Credential-based** — Standard MQTT username/password in the CONNECT packet. +- **mTLS** — Added in v4.3.0. Mutual TLS authentication using client certificates. The `CN` (common name) from the client certificate subject is used as the Harper username by default. + +Authentication is required by default (`requireAuthentication: true`). See [MQTT Configuration](./configuration) for details on disabling authentication or configuring mTLS options. + +## Server Events API + +JavaScript components can listen for MQTT connection events via `server.mqtt.events`: + +```javascript +server.mqtt.events.on('connected', (session, socket) => { + console.log('client connected with id', session.clientId); +}); +``` + +Available events: + +| Event | Description | +| -------------- | ---------------------------------------------------- | +| `connection` | Client establishes a TCP or WebSocket connection | +| `connected` | Client completes MQTT handshake and is authenticated | +| `auth-failed` | Client fails to authenticate | +| `disconnected` | Client disconnects | + +## Feature Support Matrix + +| Feature | Support | +| --------------------------------------------- | ------------------------------------------------------------ | +| MQTT v3.1.1 connections | ✅ | +| MQTT v5 connections | ✅ | +| Secure MQTTS (TLS) | ✅ | +| MQTT over WebSockets | ✅ | +| Authentication via username/password | ✅ | +| Authentication via mTLS | ✅ (added v4.3.0) | +| Publish | ✅ | +| Subscribe | ✅ | +| Multi-level wildcard (`#`) | ✅ | +| Single-level wildcard (`+`) | ✅ (added v4.3.0) | +| QoS 0 | ✅ | +| QoS 1 | ✅ | +| QoS 2 | Not fully supported — conversation supported, not guaranteed | +| Keep-Alive monitoring | ✅ | +| Clean session | ✅ | +| Durable session | ✅ | +| Distributed durable session | Not supported | +| Last Will | ✅ | +| MQTT V5 Subscribe retain handling | ✅ (added v4.3.0) | +| MQTT V5 User properties | Not supported | +| MQTT V5 Will properties | Not supported | +| MQTT V5 Connection properties | Not supported | +| MQTT V5 Connection acknowledgement properties | Not supported | +| MQTT V5 Publish properties | Not supported | +| MQTT V5 Subscribe properties (general) | Not supported | +| MQTT V5 Ack properties | Not supported | +| MQTT V5 AUTH command | Not supported | +| MQTT V5 Shared subscriptions | Not supported | + +## Related + +- [MQTT Configuration](./configuration) +- [HTTP Overview](TODO:reference_versioned_docs/version-v4/http/overview.md 'HTTP server overview — handles MQTT over WebSocket') +- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security, TLS, and mTLS overview') +- [Database Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Defining tables and topics with schema.graphql') +- [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface — same path conventions as MQTT topics') diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index a537f24e..62cbab66 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -133,6 +133,24 @@ } ] }, + { + "type": "category", + "label": "MQTT", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "mqtt/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "mqtt/configuration", + "label": "Configuration" + } + ] + }, { "type": "category", "label": "GraphQL Querying", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 4d698c84..2b13f37d 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -411,7 +411,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: New content about `loadEnv` plugin - **Additional Sources**: Built-in extensions docs, configuration docs - **Version Annotations**: loadEnv added in v4.5.0 -- **Status**: In Progress +- **Status**: Complete - **Notes**: Covers `loadEnv` extension only. Harper-level environment variable configuration (naming conventions, `HDB_CONFIG`, `HARPER_DEFAULT_CONFIG`, `HARPER_SET_CONFIG`) belongs in the Configuration section — see notes there. - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Built-in loadEnv component @@ -426,7 +426,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: - `versioned_docs/version-4.6/reference/components/built-in-extensions.md` (pre-v4.7 behavior) - `versioned_docs/version-4.5/developers/components/built-in.md` (early v4 behavior) -- **Status**: In Progress +- **Status**: Complete - **Notes**: No separate `configuration.md` needed — all static plugin options are documented inline in the overview. The `configuration.md` entry has been removed; static file serving has no Harper-level configuration. The v4.7 Plugin API (`extensions`, `fallthrough`, `index`, `notFound` options and auto-update behavior) is annotated as added in v4.7.0 (inferred from version comparison; not present in v4.6 docs). The `static` plugin itself predates v4.7 (present in v4.4 and earlier). - **Release Notes**: - [4.7.2](release-notes/v4-tucker/4.7.2.md) - `static` handler defaults to `index.html` @@ -440,7 +440,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: New content about HTTP server - **Additional Sources**: Configuration docs, architecture docs -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Worker threads for HTTP requests - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Socket management (SO_REUSEPORT), flexible port configs @@ -450,7 +450,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from `reference/configuration.md` (http section) - **Version Annotations**: - HTTP/2 support: v4.5.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - HTTP/2 support @@ -460,7 +460,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: Current `reference/globals.md` - **Version Annotations**: - server.authenticateUser: v4.5.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - server.authenticateUser API @@ -531,7 +531,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - mTLS support: v4.3.0 - Single-level wildcards: v4.3.0 - CRDT: v4.3.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - MQTT support introduced (QoS 0 and 1, durable sessions) - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS, single-level wildcards, retain handling, CRDT @@ -541,7 +541,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from configuration docs and real-time docs - **Version Annotations**: Port change v4.5.0 (9925 → 9933) -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Default replication port change From 3758021962bc06ccd8e4ebaef5aea4cd4e7173a2 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 17 Mar 2026 08:00:32 -0600 Subject: [PATCH 19/51] Security and Users & Roles Sections Migration (#452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: migrate Security section to v4 consolidated reference Migrates all security documentation from versioned_docs into the new reference_versioned_docs/version-v4/security/ structure. Includes 9 pages covering authentication methods, certificate management, CORS, SSL/HTTPS configuration, and users/roles RBAC. Co-Authored-By: Claude Sonnet 4.6 * docs: move Users & Roles to its own top-level section Splits security/users-and-roles.md into a dedicated users-and-roles/ section with overview, configuration, and operations files. Updates the sidebar to reflect the new top-level "Users & Roles" category. Co-Authored-By: Claude Sonnet 4.6 * manual edits to security sections * docs: add security/configuration.md and fix authentication config keys - Add security/configuration.md covering the top-level `authentication:` config block (authorizeLocal, cacheTTL, enableSessions, token timeouts, hashFunction) with a TODO to verify the hashFunction key name - Fix jwt-authentication.md: config was incorrectly nested under `operationsApi.authentication` — it is a top-level `authentication:` section - Fix jwt-authentication.md: update broken ssl.md link to http/tls.md - Update users-and-roles/configuration.md: replace fabricated config snippet with a link to security/configuration.md#hashfunction Co-Authored-By: Claude Sonnet 4.6 * finish security and users-and-roles sections * docs: update plan to reflect users-and-roles promotion to top-level section Notes that users-and-roles was broken out from security/ during migration, and documents the new users-and-roles/ and security/configuration.md structure. Co-Authored-By: Claude Sonnet 4.6 * pr review fixes * switch to jsonwebtoken module reference * remove cluster_user --------- Co-authored-by: Claude Sonnet 4.6 --- .../security-link-placeholders.md | 29 ++ .../version-v4/http/tls.md | 2 +- .../security/basic-authentication.md | 58 +++ .../security/certificate-management.md | 152 ++++++ .../security/certificate-verification.md | 447 ++++++++++++++++++ .../version-v4/security/configuration.md | 71 +++ .../version-v4/security/jwt-authentication.md | 118 +++++ .../security/mtls-authentication.md | 80 ++++ .../version-v4/security/overview.md | 51 ++ .../users-and-roles/configuration.md | 67 +++ .../version-v4/users-and-roles/operations.md | 176 +++++++ .../version-v4/users-and-roles/overview.md | 253 ++++++++++ .../version-v4-sidebars.json | 66 +++ v4-docs-migration-map.md | 18 +- v4-docs-reference-plan.md | 15 +- 15 files changed, 1590 insertions(+), 13 deletions(-) create mode 100644 migration-context/link-placeholders/security-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/security/basic-authentication.md create mode 100644 reference_versioned_docs/version-v4/security/certificate-management.md create mode 100644 reference_versioned_docs/version-v4/security/certificate-verification.md create mode 100644 reference_versioned_docs/version-v4/security/configuration.md create mode 100644 reference_versioned_docs/version-v4/security/jwt-authentication.md create mode 100644 reference_versioned_docs/version-v4/security/mtls-authentication.md create mode 100644 reference_versioned_docs/version-v4/security/overview.md create mode 100644 reference_versioned_docs/version-v4/users-and-roles/configuration.md create mode 100644 reference_versioned_docs/version-v4/users-and-roles/operations.md create mode 100644 reference_versioned_docs/version-v4/users-and-roles/overview.md diff --git a/migration-context/link-placeholders/security-link-placeholders.md b/migration-context/link-placeholders/security-link-placeholders.md new file mode 100644 index 00000000..e5464816 --- /dev/null +++ b/migration-context/link-placeholders/security-link-placeholders.md @@ -0,0 +1,29 @@ +# Link Placeholders for Security Section + +## reference_versioned_docs/version-v4/security/mtls-authentication.md + +- Line 47: `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` + - Context: Referring to replication mTLS configuration + - Target should be: Replication clustering page that covers mTLS for replication + +## reference_versioned_docs/version-v4/security/certificate-management.md + +- Line 8: `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` + - Context: Note that this page covers external-facing APIs; replication certs are covered separately + - Target should be: Replication clustering page with certificate management section + +- ~~Line 105: `[TODO:reference_versioned_docs/version-v4/cli/commands.md]`~~ **RESOLVED** → `../cli/commands.md` + +## reference_versioned_docs/version-v4/security/certificate-verification.md + +- Line 190: `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` + - Context: Replication mTLS configuration reference + - Target should be: Replication clustering page + +## reference_versioned_docs/version-v4/security/cors.md + +- ~~Line 36: `[TODO:reference_versioned_docs/version-v4/http/configuration.md]`~~ **RESOLVED** → `../http/configuration.md` + +## reference_versioned_docs/version-v4/security/ssl.md + +- ~~Line 56: `[TODO:reference_versioned_docs/version-v4/http/tls.md]`~~ **RESOLVED** → `../http/tls.md` diff --git a/reference_versioned_docs/version-v4/http/tls.md b/reference_versioned_docs/version-v4/http/tls.md index 9d7508d9..2fbaa4b3 100644 --- a/reference_versioned_docs/version-v4/http/tls.md +++ b/reference_versioned_docs/version-v4/http/tls.md @@ -116,4 +116,4 @@ See the [Operations API Configuration](TODO:reference_versioned_docs/version-v4/ - [HTTP Configuration](./configuration) — `http.securePort`, `http.http2`, `http.mtls` - [HTTP Overview](./overview) -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Certificate management, mTLS, and other security topics') +- [Security mTLS Authentication](../security/mtls-authentication.md) diff --git a/reference_versioned_docs/version-v4/security/basic-authentication.md b/reference_versioned_docs/version-v4/security/basic-authentication.md new file mode 100644 index 00000000..d2393fd8 --- /dev/null +++ b/reference_versioned_docs/version-v4/security/basic-authentication.md @@ -0,0 +1,58 @@ +--- +id: basic-authentication +title: Basic Authentication +--- + + + +Available since: v4.1.0 + +Harper supports HTTP Basic Authentication. In the context of an HTTP transaction, [Basic Authentication](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Authentication#basic_authentication_scheme) is the simplest authorization scheme which transmits credentials as username/password pairs encoded using base64. Importantly, this scheme does not encrypt credentials. If used over an insecure connection, such as HTTP, they are susceptible to being compromised. Only ever use Basic Authentication over secured connections, such as HTTPS. Even then, its better to upgrade to an encryption based authentication scheme or certificates. See [SSL / HTTPS](./ssl.md) for more information. + +## How It Works + +Each request must contain the `Authorization` header with a value if `Basic `, where `` is the Base64 encoding of the string `username:password`. + +``` +Authorization: Basic +``` + +## Example + +The following example shows how to construct the Authorization header using `btoa()`: + +```javascript +const username = 'HDB_ADMIN'; +const password = 'abc123!'; +const authorizationValue = `Basic ${btoa(`${username}:${password}`)}`; +``` + +Then use the `authorizationValue` as the value for the `Authorization` header such as: + +```javascript +fetch('/', { + // ... + headers: { + Authorization: authorizationValue, + }, + // ... +}); +``` + +## cURL Example + +With cURL you can use the `--user` (`-u`) command-line option to automatically handle the Base64 encoding: + +```bash +curl -u "username:password" [URL] +``` + +## When to Use Basic Auth + +Basic authentication is the simplest option and is appropriate for: + +- Server-to-server requests in trusted environments +- Development and testing +- Scenarios where token management overhead is undesirable + +For user-facing applications or when tokens are preferred for performance reasons, see [JWT Authentication](./jwt-authentication.md). diff --git a/reference_versioned_docs/version-v4/security/certificate-management.md b/reference_versioned_docs/version-v4/security/certificate-management.md new file mode 100644 index 00000000..3f76e644 --- /dev/null +++ b/reference_versioned_docs/version-v4/security/certificate-management.md @@ -0,0 +1,152 @@ +--- +id: certificate-management +title: Certificate Management +--- + + + + + +This page covers certificate management for Harper's external-facing HTTP and Operations APIs. For replication certificate management, see [Replication Certificate Management](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Replication clustering and certificate management'). + +## Default Behavior + +On first run, Harper automatically generates self-signed TLS certificates at `/keys/`: + +- `certificate.pem` — The server certificate +- `privateKey.pem` — The server private key +- `ca.pem` — A self-signed Certificate Authority + +These certificates have a valid Common Name (CN), but they are not signed by a root authority. HTTPS can be used with them, but clients must be configured to accept the invalid certificate. + +## Development Setup + +By default, HTTPS is disabled. HTTP is suitable for local development and trusted private networks. If you are developing on a remote server with requests traversing the Internet, enable HTTPS. + +To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` and restart Harper: + +```yaml +http: + securePort: 9926 +``` + +Harper will use the auto-generated certificates from `/keys/`. + +## Production Setup + +For production, use certificates from your own CA or a public CA, with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. + +### Option 1: Replace Harper Certificates + +Enable HTTPS and replace the certificate files: + +```yaml +http: + securePort: 9926 +tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +Either replace the files at `/keys/` in place, or update `tls.certificate` and `tls.privateKey` to point to your new files and restart Harper. + +The `operationsApi.tls` section is optional. If not set, Harper uses the values from the top-level `tls` section. You can specify different certificates for the Operations API: + +```yaml +operationsApi: + tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +### Option 2: Nginx Reverse Proxy + +Instead of enabling HTTPS directly on Harper, use Nginx as a reverse proxy. Configure Nginx to handle HTTPS with certificates from your own CA or a public CA, then forward HTTP requests to Harper. + +This approach keeps Harper's HTTP interface internal while Nginx handles TLS termination. + +### Option 3: External Reverse Proxy / Load Balancer + +External services such as an AWS Elastic Load Balancer or Google Cloud Load Balancing can act as TLS-terminating reverse proxies. Configure the service to accept HTTPS connections and forward over a private network to Harper as HTTP. + +These services typically include integrated certificate management. + +## mTLS Setup + +Mutual TLS (mTLS) requires both client and server to present certificates. To enable mTLS, provide a CA certificate that Harper will use to verify client certificates: + +```yaml +http: + mtls: + required: true +tls: + certificateAuthority: ~/hdb/keys/ca.pem +``` + +For full mTLS authentication details, see [mTLS Authentication](./mtls-authentication.md). + +## Certificate Verification + +Added in: v4.5.0 (certificate revocation); v4.7.0 (OCSP support) + +When using mTLS, enable certificate verification to ensure revoked certificates cannot authenticate even if still within their validity period: + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +Harper supports two industry-standard methods: + +**CRL (Certificate Revocation List)** + +- Downloaded and cached locally (24 hours by default) +- Fast verification after first download (no network requests) +- Best for high-volume verification and offline scenarios + +**OCSP (Online Certificate Status Protocol)** + +- Real-time query to the CA's OCSP responder +- Best for certificates without CRL distribution points +- Responses cached (1 hour by default) + +**Harper's approach: CRL-first with OCSP fallback** + +1. Checks CRL if available (fast, cached locally) +2. Falls back to OCSP if CRL is unavailable or fails +3. Applies the configured failure mode if both methods fail + +For full configuration options and troubleshooting, see [Certificate Verification](./certificate-verification.md). + +## Dynamic Certificate Management + +Added in: v4.4.0 (confirmed via release notes) + +Certificates — including CAs and private keys — can be dynamically managed without restarting Harper. + +## Multiple Certificate Authorities + +It is possible to use different certificates for the Operations API and the HTTP (custom application) API. For example, in scenarios where only your application endpoints need to be exposed to the Internet and the Operations API is reserved for administration, you may use a private CA for the Operations API and a public CA for your application certificates. + +Configure each separately: + +```yaml +# Top-level tls: used by HTTP/application endpoints +tls: + certificate: ~/hdb/keys/app-certificate.pem + privateKey: ~/hdb/keys/app-privateKey.pem + +# Operations API can use a separate cert +operationsApi: + tls: + certificate: ~/hdb/keys/ops-certificate.pem + privateKey: ~/hdb/keys/ops-privateKey.pem +``` + +## Renewing Certificates + +The `harper renew-certs` CLI command renews the auto-generated Harper certificates. See [CLI Commands](../cli/commands.md) for details. + +**Changes to TLS settings require a restart**, except where dynamic certificate management is used. diff --git a/reference_versioned_docs/version-v4/security/certificate-verification.md b/reference_versioned_docs/version-v4/security/certificate-verification.md new file mode 100644 index 00000000..8a6aa673 --- /dev/null +++ b/reference_versioned_docs/version-v4/security/certificate-verification.md @@ -0,0 +1,447 @@ +--- +id: certificate-verification +title: Certificate Verification +--- + + + + + +Added in: v4.7.0 (OCSP support confirmed via release notes; certificate revocation support added in v4.5.0) + +Certificate verification (also called certificate revocation checking) ensures that revoked certificates cannot be used for mTLS authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date — due to compromise, employee departure, or other security concerns. + +## Overview + +When a client presents a certificate for mTLS authentication, Harper performs two levels of checks: + +1. **Certificate Validation** (always performed by Node.js TLS): + - Certificate signature is valid + - Certificate is issued by a trusted CA + - Certificate is within its validity period + - Certificate chain is properly formed + +2. **Certificate Revocation Checking** (optional, must be explicitly enabled): + - Certificate has not been revoked by the issuing CA + - Uses CRL and/or OCSP + +Revocation checking is **disabled by default**. + +## Revocation Checking Methods + +### CRL (Certificate Revocation List) + +A CRL is a digitally signed list of revoked certificates published by a Certificate Authority. + +**Advantages:** + +- Fast verification (cached locally) +- Works offline once downloaded +- Predictable bandwidth usage +- Good for high-volume verification +- No privacy concerns (no per-certificate queries) + +**How it works:** + +1. Harper downloads the CRL from the distribution point specified in the certificate. +2. The CRL is cached locally (24 hours by default). +3. Subsequent verifications check the cached CRL — very fast, no network requests. +4. The CRL is refreshed in the background before expiration. + +**Configuration:** + +```yaml +http: + mtls: + certificateVerification: + crl: + timeout: 10000 # 10 seconds to download CRL + cacheTtl: 86400000 # Cache for 24 hours + gracePeriod: 86400000 # 24 hour grace period after nextUpdate + failureMode: fail-closed # Reject on CRL check failure +``` + +### OCSP (Online Certificate Status Protocol) + +OCSP provides real-time certificate status checking by querying the CA's OCSP responder. + +**Advantages:** + +- Real-time revocation status +- Smaller response size than CRL +- Good for certificates without CRL distribution points +- Works when CRL is unavailable + +**How it works:** + +1. Harper sends a request to the OCSP responder specified in the certificate. +2. The responder returns the current status: good, revoked, or unknown. +3. The response is cached (1 hour by default for success, 5 minutes for errors). + +**Configuration:** + +```yaml +http: + mtls: + certificateVerification: + ocsp: + timeout: 5000 # 5 seconds for OCSP response + cacheTtl: 3600000 # Cache successful responses for 1 hour + errorCacheTtl: 300000 # Cache errors for 5 minutes + failureMode: fail-closed # Reject on OCSP check failure +``` + +## Verification Strategy + +Harper uses a **CRL-first strategy with OCSP fallback**: + +1. **Check CRL** if available (fast; uses cached CRL; no network request if cached). +2. **Fall back to OCSP** if the certificate has no CRL distribution point, the CRL download fails, or the CRL is expired and cannot be refreshed. +3. **Apply failure mode** if both methods fail. + +This provides the best balance of performance, reliability, and security. + +## Configuration + +### Enable with Defaults + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +This enables CRL checking (10s timeout, 24h cache), OCSP checking (5s timeout, 1h cache), and fail-closed mode. + +### Custom Configuration + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed # Global setting + crl: + timeout: 15000 # 15 seconds for CRL download + cacheTtl: 43200000 # Cache CRLs for 12 hours + gracePeriod: 86400000 # 24 hour grace period + failureMode: fail-closed # CRL-specific setting + ocsp: + timeout: 8000 # 8 seconds for OCSP response + cacheTtl: 7200000 # Cache results for 2 hours + errorCacheTtl: 600000 # Cache errors for 10 minutes + failureMode: fail-closed # OCSP-specific setting +``` + +### CRL Only (No OCSP) + +```yaml +http: + mtls: + certificateVerification: + ocsp: false # Disable OCSP; CRL remains enabled +``` + +Only disable OCSP if all client certificates have CRL distribution points. Otherwise, certificates without CRL URLs won't be checked for revocation. + +### OCSP Only (No CRL) + +```yaml +http: + mtls: + certificateVerification: + crl: false # Disable CRL; OCSP remains enabled +``` + +### Environment Variables + +All settings can be configured via environment variables: + +```bash +# Enable certificate verification +HTTP_MTLS_CERTIFICATEVERIFICATION=true + +# Global failure mode +HTTP_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed + +# CRL settings +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL=true +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_CACHETTL=43200000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_GRACEPERIOD=86400000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_FAILUREMODE=fail-closed + +# OCSP settings +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP=true +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_TIMEOUT=8000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_CACHETTL=7200000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_ERRORCACHETTL=600000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_FAILUREMODE=fail-closed +``` + +For replication servers, use the `REPLICATION_` prefix instead of `HTTP_`. + +## Failure Modes + +### fail-closed (Recommended) + +**Default behavior.** Rejects connections when verification fails due to network errors, timeouts, or other operational issues. + +Use when: + +- Security is paramount +- You can tolerate false positives (rejecting valid certificates due to CA unavailability) +- Your CA infrastructure is highly available +- You're in a zero-trust environment + +```yaml +certificateVerification: + failureMode: fail-closed +``` + +### fail-open + +Allows connections when verification fails, but logs a warning. The connection is still rejected if the certificate is explicitly found to be revoked. + +Use when: + +- Availability is more important than perfect security +- Your CA infrastructure may be intermittently unavailable +- You have other compensating controls +- You're gradually rolling out certificate verification + +```yaml +certificateVerification: + failureMode: fail-open +``` + +**Important:** Invalid signatures on CRLs always result in rejection regardless of failure mode, as this indicates potential tampering. + +## Performance Considerations + +### CRL Performance + +- **First verification**: Downloads CRL (10s timeout by default) +- **Subsequent verifications**: Instant (reads from cache) +- **Background refresh**: CRL is refreshed before expiration without blocking requests +- **Memory usage**: ~10–100KB per CRL depending on size +- **Network usage**: One download per CRL per `cacheTtl` period + +### OCSP Performance + +- **First verification**: OCSP query (5s timeout by default) +- **Subsequent verifications**: Reads from cache (1 hour default) +- **Memory usage**: Minimal (~1KB per cached response) +- **Network usage**: One query per unique certificate per `cacheTtl` period + +### Optimization Tips + +Increase CRL cache TTL for stable environments: + +```yaml + +... +crl: + cacheTtl: 172800000 # 48 hours +``` + +Increase OCSP cache TTL for long-lived connections: + +```yaml + +... +ocsp: + cacheTtl: 7200000 # 2 hours +``` + +Reduce grace period for tighter revocation enforcement: + +```yaml + +... +crl: + gracePeriod: 0 # No grace period +``` + +## Production Best Practices + +### High-Security Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed + crl: + timeout: 15000 + cacheTtl: 43200000 # 12 hours + gracePeriod: 0 # No grace period for strict enforcement + ocsp: + timeout: 8000 + cacheTtl: 3600000 # 1 hour +``` + +### High-Availability Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-open # Prioritize availability + crl: + timeout: 5000 + cacheTtl: 86400000 # 24 hours + gracePeriod: 86400000 # 24 hour grace period + ocsp: + timeout: 3000 + cacheTtl: 7200000 # 2 hours +``` + +### Performance-Critical Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + crl: + cacheTtl: 172800000 # 48 hours + gracePeriod: 86400000 + ocsp: + cacheTtl: 7200000 # 2 hours + errorCacheTtl: 600000 +``` + +## Troubleshooting + +### Connection Rejected: Certificate Verification Failed + +**Cause:** Certificate was found to be revoked, or verification failed in fail-closed mode. + +**Solutions:** + +1. Check if the certificate is actually revoked in the CRL or OCSP responder. +2. Verify CA infrastructure is accessible. +3. Check timeout settings — increase if needed. +4. Temporarily switch to fail-open mode while investigating. + +### High Latency on First Connection + +**Cause:** CRL is being downloaded for the first time. + +**Solutions:** + +1. This is normal; only happens once per CRL per `cacheTtl` period. +2. Subsequent connections will be fast (cached CRL). +3. Increase CRL timeout if downloads are slow: + ```yaml + crl: + timeout: 20000 # 20 seconds + ``` + +### Frequent CRL Downloads + +**Cause:** `cacheTtl` is too short, or the CRL's `nextUpdate` period is very short. + +**Solutions:** + +1. Increase `cacheTtl`: + ```yaml + crl: + cacheTtl: 172800000 # 48 hours + ``` +2. Increase `gracePeriod` to allow using slightly expired CRLs. + +### OCSP Responder Unavailable + +**Cause:** OCSP responder is down or unreachable. + +**Solutions:** + +1. CRL will be used as fallback automatically. +2. Use fail-open mode to allow connections: + ```yaml + ocsp: + failureMode: fail-open + ``` +3. Disable OCSP and rely on CRL only (ensure all certs have CRL URLs): + ```yaml + ocsp: false + ``` + +### Network or Firewall Blocking Outbound Requests + +**Cause:** Secure hosting environments often restrict outbound HTTP/HTTPS traffic. This prevents Harper from reaching CRL distribution points and OCSP responders. + +**Symptoms:** + +- Certificate verification timeouts in fail-closed mode +- Logs show connection failures to CRL/OCSP URLs +- First connection may succeed (no cached data), but subsequent connections fail after cache expires + +**Solutions:** + +1. **Allow outbound traffic to CA infrastructure** (recommended): + - Whitelist CRL distribution point URLs from your certificates + - Whitelist OCSP responder URLs from your certificates + - Example for Let's Encrypt: allow `http://x1.c.lencr.org/` and `http://ocsp.int-x3.letsencrypt.org/` + +2. **Use fail-open mode:** + + ```yaml + certificateVerification: + failureMode: fail-open + ``` + +3. **Set up an internal CRL mirror/proxy:** + + ```yaml + certificateVerification: + crl: + cacheTtl: 172800000 # 48 hours + ocsp: false + ``` + +4. **Disable verification** (if you have alternative security controls): + ```yaml + certificateVerification: false + ``` + +## Security Considerations + +Enable certificate verification when: + +- Certificates have long validity periods (> 1 day) +- You need immediate revocation capability +- Compliance requires revocation checking (PCI DSS, HIPAA, etc.) +- You're in a zero-trust security model +- Client certificates are used for API authentication + +Consider skipping it when: + +- Certificates have very short validity periods (< 24 hours) +- You rotate certificates automatically (e.g., with cert-manager) +- You have alternative revocation mechanisms +- Your CA doesn't publish CRLs or support OCSP + +Certificate verification is one layer of security. Also consider: short certificate validity periods, certificate pinning, network segmentation, access logging, and regular certificate rotation. + +## Replication + +Certificate verification works identically for replication servers. Use the `replication.mtls` configuration: + +```yaml +replication: + hostname: server-one + routes: + - server-two + mtls: + certificateVerification: true +``` + +mTLS is always required for replication and cannot be disabled. This configuration only controls whether certificate revocation checking is performed. + +For complete replication configuration, see [Replication Configuration](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Replication clustering configuration'). diff --git a/reference_versioned_docs/version-v4/security/configuration.md b/reference_versioned_docs/version-v4/security/configuration.md new file mode 100644 index 00000000..758391e9 --- /dev/null +++ b/reference_versioned_docs/version-v4/security/configuration.md @@ -0,0 +1,71 @@ +--- +id: configuration +title: Authentication Configuration +--- + + + +Harper's authentication system is configured via the top-level `authentication` section of `harperdb-config.yaml`. + +```yaml +authentication: + authorizeLocal: true + cacheTTL: 30000 + enableSessions: true + operationTokenTimeout: 1d + refreshTokenTimeout: 30d + hashFunction: sha256 +``` + +## Options + +### `authorizeLocal` + +_Type: boolean — Default: `true`_ + +Automatically authorizes requests from the loopback IP address (`127.0.0.1`) as the superuser, without requiring credentials. Disable this for any Harper server that may be accessed by untrusted users from the same instance — for example, when using a local proxy or for general server hardening. + +### `cacheTTL` + +_Type: number — Default: `30000`_ + +How long (in milliseconds) an authentication result — a particular `Authorization` header or token — can be cached. Increasing this improves performance at the cost of slower revocation. + +### `enableSessions` + +_Type: boolean — Default: `true`_ + +Added in: v4.2.0 + +Enables cookie-based sessions to maintain an authenticated session across requests. This is the preferred authentication mechanism for web browsers: cookies hold the token securely without exposing it to JavaScript, reducing XSS vulnerability risk. + +### `operationTokenTimeout` + +_Type: string — Default: `1d`_ + +How long a JWT operation token remains valid before expiring. Accepts [`jsonwebtoken`-compatible](https://github.com/auth0/node-jsonwebtoken#token-expiration-exp-claim) duration strings (e.g., `1d`, `12h`, `60m`). See [JWT Authentication](./jwt-authentication.md). + +### `refreshTokenTimeout` + +_Type: string — Default: `30d`_ + +How long a JWT refresh token remains valid before expiring. Accepts [`jsonwebtoken`-compatible](https://github.com/auth0/node-jsonwebtoken#token-expiration-exp-claim) duration strings. See [JWT Authentication](./jwt-authentication.md). + +### `hashFunction` + +_Type: string — Default: `sha256`_ + +Added in: v4.5.0 + +Password hashing algorithm used when storing user passwords. Replaced the previous MD5 hashing. Options: + +- **`sha256`** — Default. Good security and excellent performance. +- **`argon2id`** — Highest security. More CPU-intensive; recommended for environments that do not require frequent password verifications. + + + +## Related + +- [JWT Authentication](./jwt-authentication.md) +- [Basic Authentication](./basic-authentication.md) +- [Users & Roles / Configuration](../users-and-roles/configuration.md) diff --git a/reference_versioned_docs/version-v4/security/jwt-authentication.md b/reference_versioned_docs/version-v4/security/jwt-authentication.md new file mode 100644 index 00000000..8a35124c --- /dev/null +++ b/reference_versioned_docs/version-v4/security/jwt-authentication.md @@ -0,0 +1,118 @@ +--- +id: jwt-authentication +title: JWT Authentication +--- + +s + +Available since: v4.1.0 + +Harper supports token-based authentication using JSON Web Tokens (JWTs). Rather than sending credentials on every request, a client authenticates once and receives tokens that are used for subsequent requests. + +## Tokens + +JWT authentication uses two token types: + +- **`operation_token`** — Used to authenticate all Harper operations via a `Bearer` token `Authorization` header. Default expiry: 1 day. +- **`refresh_token`** — Used to obtain a new `operation_token` when the current one expires. Default expiry: 30 days. + +## Create Authentication Tokens + +Call `create_authentication_tokens` with your Harper credentials. No `Authorization` header is required for this operation. + +```json +{ + "operation": "create_authentication_tokens", + "username": "username", + "password": "password" +} +``` + +cURL example: + +```bash +curl --location --request POST 'http://localhost:9925' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "operation": "create_authentication_tokens", + "username": "username", + "password": "password" + }' +``` + +Response: + +```json +{ + "operation_token": "", + "refresh_token": "" +} +``` + +## Using the Operation Token + +Pass the `operation_token` as a `Bearer` token in the `Authorization` header on subsequent requests: + +```bash +curl --location --request POST 'http://localhost:9925' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data-raw '{ + "operation": "search_by_hash", + "schema": "dev", + "table": "dog", + "hash_values": [1], + "get_attributes": ["*"] + }' +``` + +## Refreshing the Operation Token + +When the `operation_token` expires, use the `refresh_token` to obtain a new one. Pass the `refresh_token` as the `Bearer` token: + +```bash +curl --location --request POST 'http://localhost:9925' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data-raw '{ + "operation": "refresh_operation_token" + }' +``` + +Response: + +```json +{ + "operation_token": "" +} +``` + +When both tokens have expired, call `create_authentication_tokens` again with your username and password. + +## Token Expiry Configuration + +Token timeouts are configurable in `harperdb-config.yaml` under the top-level `authentication` section: + +```yaml +authentication: + operationTokenTimeout: 1d # Default: 1 day + refreshTokenTimeout: 30d # Default: 30 days +``` + +Valid duration string values follow the [`jsonwebtoken` package format](https://github.com/auth0/node-jsonwebtoken#token-expiration-exp-claim) (e.g., `1d`, `12h`, `60m`). See [Security / Configuration](./configuration.md) for the full authentication config reference. + +## When to Use JWT Auth + +JWT authentication is preferred over Basic Auth when: + +- You want to avoid sending credentials on every request +- Your client can store and manage tokens +- You have multiple sequential requests and want to avoid repeated credential encoding + +For simple or server-to-server scenarios, see [Basic Authentication](./basic-authentication.md). + +## Security Notes + +- Always use HTTPS in production to protect tokens in transit. See [HTTP / TLS](../http/tls.md). +- Store tokens securely; treat them like passwords. +- If a token is compromised, it will remain valid until it expires. Consider setting shorter `operationTokenTimeout` values in high-security environments. diff --git a/reference_versioned_docs/version-v4/security/mtls-authentication.md b/reference_versioned_docs/version-v4/security/mtls-authentication.md new file mode 100644 index 00000000..82f5d22e --- /dev/null +++ b/reference_versioned_docs/version-v4/security/mtls-authentication.md @@ -0,0 +1,80 @@ +--- +id: mtls-authentication +title: mTLS Authentication +--- + + + + +Added in: v4.3.0 + +Harper supports Mutual TLS (mTLS) authentication for incoming HTTP connections. When enabled, the client must present a certificate signed by a trusted Certificate Authority (CA). If the certificate is valid and trusted, the connection is authenticated using the user whose username matches the `CN` (Common Name) from the client certificate's `subject`. + +## How It Works + +1. The client presents a TLS certificate during the handshake. +2. Harper validates the certificate against the configured CA (`tls.certificateAuthority`). +3. If valid, Harper extracts the `CN` from the certificate subject and uses it as the username for the request. + 1. Or it is configurable via the `http.mtls.user` option in the relevant configuration object. +4. Optionally, Harper checks whether the certificate has been revoked (see [Certificate Verification](./certificate-verification.md)). + +## Configuration + +mTLS is configured via the `http.mtls` section in `harperdb-config.yaml`. + +**Require mTLS for all connections:** + +```yaml +http: + mtls: + required: true +tls: + certificateAuthority: ~/hdb/keys/ca.pem +``` + +**Make mTLS optional (accept both mTLS and non-mTLS connections):** + +```yaml +http: + mtls: + required: false +tls: + certificateAuthority: ~/hdb/keys/ca.pem +``` + +When `required` is `false`, clients that do not present a certificate will fall back to other authentication methods (Basic Auth or JWT). + +For more configuration information see the [HTTP / Configuration](../http/configuration.md) and [HTTP / TLS](../http/tls.md) sections. + +## Certificate Revocation Checking + +When using mTLS, you can optionally enable certificate revocation checking to ensure that revoked certificates cannot authenticate, even if they are otherwise valid and trusted. + +To enable: + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +Certificate revocation checking is **disabled by default** and must be explicitly enabled. For full details on CRL and OCSP configuration, see [Certificate Verification](./certificate-verification.md). + +## User Identity + +The username for the mTLS-authenticated request is derived from the `CN` field of the client certificate's subject. Ensure the CN value matches an existing Harper user account. See [Users and Roles](./users-and-roles.md) for managing user accounts. + +## Setup Requirements + +To use mTLS you need: + +1. A Certificate Authority (CA) certificate configured in `tls.certificateAuthority`. +2. Client certificates signed by that CA, with a `CN` matching a Harper username. +3. The `http.mtls` configuration enabled. + +For help generating and managing certificates, see [Certificate Management](./certificate-management.md). + +## Replication + +mTLS is always required for Harper replication and cannot be disabled. For replication-specific mTLS configuration, see [Replication Configuration](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Replication clustering configuration'). diff --git a/reference_versioned_docs/version-v4/security/overview.md b/reference_versioned_docs/version-v4/security/overview.md new file mode 100644 index 00000000..1f3bb539 --- /dev/null +++ b/reference_versioned_docs/version-v4/security/overview.md @@ -0,0 +1,51 @@ +--- +id: overview +title: Security +--- + + + + +Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they are supposed to be able to access. Granular permissions allow for unparalleled flexibility and control, and can lower the total cost of ownership compared to other database solutions, since you no longer need to replicate subsets of data to isolate use cases. + +## Security Philosophy + +Harper's security model has two distinct layers: + +**Authentication** determines _who_ is making a request. Harper validates each request using one of the methods above, then resolves the caller to a known Harper user account. + +**Authorization** determines _what_ the caller can do. Each Harper user is assigned a role. Roles carry a permissions set that grants or denies CRUD access at the table and attribute level, in addition to controlling access to system operations. + +For details on how roles and permissions work, see [Users and Roles](./users-and-roles.md). + +## Authentication Methods + +Harper supports three authentication methods: + +- [Basic Authentication](./basic-authentication.md) — Username and password sent as a Base64-encoded `Authorization` header on every request. +- [JWT Authentication](./jwt-authentication.md) — Token-based authentication using JSON Web Tokens. Clients authenticate once and receive short-lived operation tokens and longer-lived refresh tokens. +- [mTLS Authentication](./mtls-authentication.md) — Mutual TLS certificate-based authentication. + +## Certificate Management + +- [Certificate Management](./certificate-management.md) — Managing TLS certificates and Certificate Authorities for HTTPS and mTLS. +- [Certificate Verification](./certificate-verification.md) — Certificate revocation checking via CRL and OCSP. + +## Access Control + +- CORS — Cross-Origin Resource Sharing. + - For HTTP server configuration see [HTTP / Configuration / CORS](../http/configuration.md#cors) + - For Operations API configuration see [Operations API / Configuration / Network](TODO: ../operations-api/configuration.md#network) +- SSL & HTTPS — Enabling HTTPS and configuring TLS for the HTTP server. + - For HTTP server configuration see [HTTP / Configuration / TLS](../http/tls.md) + - For Operations API configuration see [Operations API / Configuration / TLS](TODO: ../operations-api/configuration.md#tls) +- [Users and Roles](./users-and-roles.md) — Role-Based Access Control (RBAC): defining roles, assigning permissions, and managing users. + +## Default Behavior + +Out of the box, Harper: + +- Generates self-signed TLS certificates at `/keys/` on first run. +- Runs with HTTPS disabled (HTTP only on port 9925 for the Operations API). It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. +- Enables CORS for all origins (configurable). +- Supports Basic Auth and JWT Auth by default; mTLS must be explicitly configured. diff --git a/reference_versioned_docs/version-v4/users-and-roles/configuration.md b/reference_versioned_docs/version-v4/users-and-roles/configuration.md new file mode 100644 index 00000000..8177180b --- /dev/null +++ b/reference_versioned_docs/version-v4/users-and-roles/configuration.md @@ -0,0 +1,67 @@ +--- +id: configuration +title: Configuration +--- + + + + + +## Managing Roles with Config Files + +In addition to managing roles via the Operations API, Harper supports declaring roles in a configuration file. When the application starts, Harper ensures all declared roles exist with the specified permissions. + +Configure in your application's `config.yaml`: + +```yaml +roles: + files: roles.yaml +``` + +Example `roles.yaml`: + +```yaml +analyst: + super_user: false + data: + Sales: + read: true + insert: false + update: false + delete: false + +editor: + data: + Articles: + read: true + insert: true + update: true + attributes: + title: + read: true + update: true + author: + read: true + update: false +``` + +**Startup behavior:** + +- If a declared role does not exist, Harper creates it. +- If a declared role already exists, Harper updates its permissions to match the definition. + +## Password Hashing + +Added in: v4.5.0 + +Harper supports two password hashing algorithms, replacing the previous MD5 hashing: + +- **`sha256`** — Default algorithm. Good security and excellent performance. +- **`argon2id`** — Highest security. More CPU-intensive; recommended for high-security environments. + +Password hashing is configured via the `authentication.hashFunction` key in `harperdb-config.yaml`. See [Security / Configuration](../security/configuration.md#hashfunction) for details. + +## Related + +- [Overview](./overview) +- [Operations](./operations) diff --git a/reference_versioned_docs/version-v4/users-and-roles/operations.md b/reference_versioned_docs/version-v4/users-and-roles/operations.md new file mode 100644 index 00000000..5dc7c56f --- /dev/null +++ b/reference_versioned_docs/version-v4/users-and-roles/operations.md @@ -0,0 +1,176 @@ +--- +id: operations +title: Operations +--- + + + +## Roles + +### List Roles + +_Restricted to `super_user` roles._ + +```json +{ + "operation": "list_roles" +} +``` + +### Add Role + +_Restricted to `super_user` roles._ + +- `role` _(required)_ — Name for the new role. +- `permission` _(required)_ — Permissions object. See [Permission Structure](./overview#permission-structure). + - `super_user` _(optional)_ — If `true`, grants full access. Defaults to `false`. + - `structure_user` _(optional)_ — Boolean or array of database names. If `true`, can create/drop databases and tables. If array, limited to specified databases. + +```json +{ + "operation": "add_role", + "role": "developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": true, + "insert": true, + "update": true + } + ] + } + } + } + } +} +``` + +### Alter Role + +_Restricted to `super_user` roles._ + +- `id` _(required)_ — The `id` of the role to alter (from `list_roles`). +- `role` _(optional)_ — New name for the role. +- `permission` _(required)_ — Updated permissions object. + +```json +{ + "operation": "alter_role", + "id": "f92162e2-cd17-450c-aae0-372a76859038", + "role": "another_developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [] + } + } + } + } +} +``` + +### Drop Role + +_Restricted to `super_user` roles. Roles with associated users cannot be dropped._ + +- `id` _(required)_ — The `id` of the role to drop. + +```json +{ + "operation": "drop_role", + "id": "developer" +} +``` + +## Users + +### List Users + +_Restricted to `super_user` roles._ + +```json +{ + "operation": "list_users" +} +``` + +### User Info + +Returns user data for the currently authenticated user. Available to all roles. + +```json +{ + "operation": "user_info" +} +``` + +### Add User + +_Restricted to `super_user` roles._ + +- `role` _(required)_ — Role name to assign. +- `username` _(required)_ — Username. Cannot be changed after creation. +- `password` _(required)_ — Plain-text password. Harper encrypts it on receipt. +- `active` _(required)_ — Boolean. If `false`, user cannot access Harper. + +```json +{ + "operation": "add_user", + "role": "role_name", + "username": "hdb_user", + "password": "password", + "active": true +} +``` + +### Alter User + +_Restricted to `super_user` roles._ + +- `username` _(required)_ — Username to modify. +- `password` _(optional)_ — New password. +- `role` _(optional)_ — New role name. +- `active` _(optional)_ — Enable/disable user access. + +```json +{ + "operation": "alter_user", + "role": "role_name", + "username": "hdb_user", + "password": "new_password", + "active": true +} +``` + +### Drop User + +_Restricted to `super_user` roles._ + +```json +{ + "operation": "drop_user", + "username": "harper" +} +``` + +## Related + +- [Overview](./overview) +- [Configuration](./configuration) diff --git a/reference_versioned_docs/version-v4/users-and-roles/overview.md b/reference_versioned_docs/version-v4/users-and-roles/overview.md new file mode 100644 index 00000000..bbb99cdc --- /dev/null +++ b/reference_versioned_docs/version-v4/users-and-roles/overview.md @@ -0,0 +1,253 @@ +--- +id: overview +title: Users & Roles +--- + + + + +Harper uses a Role-Based Access Control (RBAC) framework to manage access to Harper instances. Each user is assigned a role that determines their permissions to access database resources and run operations. + +## Roles + +Role permissions in Harper are divided into two categories: + +**Database Manipulation** — CRUD (create, read, update, delete) permissions against database data (tables and attributes). + +**Database Definition** — Permissions to manage databases, tables, roles, users, and other system settings. These are restricted to the built-in `super_user` role. + +### Built-In Roles + +| Role | Description | +| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| `super_user` | Full access to all operations and methods. The admin role. | +| `structure_user` | Access to create and delete databases and tables. Can be set to `true` (all databases) or an array of database names (specific databases only). | + +### User-Defined Roles + +Admins (`super_user` users) can create custom roles with explicit permissions on specific tables and attributes. + +- Unless a user-defined role has `super_user: true`, all permissions must be defined explicitly. +- Any table or database not included in the role's permission set will be inaccessible. +- `describe` operations return metadata only for databases, tables, and attributes that the role has CRUD permissions for. + +## Permission Structure + +When creating or altering a role, you define a `permission` object: + +```json +{ + "operation": "add_role", + "role": "software_developer", + "permission": { + "super_user": false, + "database_name": { + "tables": { + "table_name1": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "attribute1", + "read": true, + "insert": true, + "update": true + } + ] + }, + "table_name2": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [] + } + } + } + } +} +``` + +### Table Permissions + +Each table entry defines CRUD access: + +```jsonc +{ + "table_name": { + "read": boolean, // Access to read from this table + "insert": boolean, // Access to insert data + "update": boolean, // Access to update data + "delete": boolean, // Access to delete rows + "attribute_permissions": [ + { + "attribute_name": "attribute_name", + "read": boolean, + "insert": boolean, + "update": boolean + // Note: "delete" is not an attribute-level permission + } + ] + } +} +``` + +### Important Rules + +**Table-level:** + +- If a database or table is not included in the permissions, the role has no access to it. +- If a table-level CRUD permission is `false`, setting the same CRUD permission to `true` on an attribute returns an error. + +**Attribute-level:** + +- If `attribute_permissions` is a non-empty array, only the listed attributes are accessible (plus the table's hash attribute — see below). +- If `attribute_permissions` is empty (`[]`), attribute access follows the table-level CRUD permissions. +- If any non-hash attribute is given CRUD access, the table's `hash_attribute` (primary key) automatically receives the same access, even if not explicitly listed. +- Any attribute not explicitly listed in a non-empty `attribute_permissions` array has no access. +- `DELETE` is not an attribute-level permission. Deleting rows is controlled at the table level. +- The `__createdtime__` and `__updatedtime__` attributes managed by Harper can have `read` permissions set; other attribute-level permissions for these fields are ignored. + +## Role-Based Operation Restrictions + +The following table shows which operations are restricted to `super_user` roles. Non-`super_user` roles are also restricted within their accessible operations by their CRUD permission set. + +### Databases and Tables + +| Operation | Restricted to Super User | +| ------------------- | :----------------------: | +| `describe_all` | | +| `describe_database` | | +| `describe_table` | | +| `create_database` | X | +| `drop_database` | X | +| `create_table` | X | +| `drop_table` | X | +| `create_attribute` | | +| `drop_attribute` | X | + +### NoSQL Operations + +| Operation | Restricted to Super User | +| ---------------------- | :----------------------: | +| `insert` | | +| `update` | | +| `upsert` | | +| `delete` | | +| `search_by_hash` | | +| `search_by_value` | | +| `search_by_conditions` | | + +### SQL Operations + +| Operation | Restricted to Super User | +| --------- | :----------------------: | +| `select` | | +| `insert` | | +| `update` | | +| `delete` | | + +### Bulk Operations + +| Operation | Restricted to Super User | +| ---------------- | :----------------------: | +| `csv_data_load` | | +| `csv_file_load` | | +| `csv_url_load` | | +| `import_from_s3` | | + +### Users and Roles + +| Operation | Restricted to Super User | +| ------------ | :----------------------: | +| `list_roles` | X | +| `add_role` | X | +| `alter_role` | X | +| `drop_role` | X | +| `list_users` | X | +| `user_info` | | +| `add_user` | X | +| `alter_user` | X | +| `drop_user` | X | + +### Clustering + +| Operation | Restricted to Super User | +| ----------------------- | :----------------------: | +| `cluster_set_routes` | X | +| `cluster_get_routes` | X | +| `cluster_delete_routes` | X | +| `add_node` | X | +| `update_node` | X | +| `cluster_status` | X | +| `remove_node` | X | +| `configure_cluster` | X | + +### Components + +| Operation | Restricted to Super User | +| -------------------- | :----------------------: | +| `get_components` | X | +| `get_component_file` | X | +| `set_component_file` | X | +| `drop_component` | X | +| `add_component` | X | +| `package_component` | X | +| `deploy_component` | X | + +### Registration + +| Operation | Restricted to Super User | +| ------------------- | :----------------------: | +| `registration_info` | | +| `get_fingerprint` | X | +| `set_license` | X | + +### Jobs + +| Operation | Restricted to Super User | +| --------------------------- | :----------------------: | +| `get_job` | | +| `search_jobs_by_start_date` | X | + +### Logs + +| Operation | Restricted to Super User | +| -------------------------------- | :----------------------: | +| `read_log` | X | +| `read_transaction_log` | X | +| `delete_transaction_logs_before` | X | +| `read_audit_log` | X | +| `delete_audit_logs_before` | X | + +### Utilities + +| Operation | Restricted to Super User | +| ----------------------- | :----------------------: | +| `delete_records_before` | X | +| `export_local` | X | +| `export_to_s3` | X | +| `system_information` | X | +| `restart` | X | +| `restart_service` | X | +| `get_configuration` | X | + +### Token Authentication + +| Operation | Restricted to Super User | +| ------------------------------ | :----------------------: | +| `create_authentication_tokens` | | +| `refresh_operation_token` | | + +## Troubleshooting: "Must execute as User" + +If you see the error `Error: Must execute as <>`, it means Harper was installed as a specific OS user and must be run by that same user. Harper stores files natively on the operating system and only allows the Harper executable to be run by a single user — this prevents file permission issues and keeps the installation secure. + +To resolve: run Harper with the same OS user account used during installation. + +## Related + +- [Configuration](./configuration) +- [Operations](./operations) diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 62cbab66..a7d64c80 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -190,6 +190,72 @@ } ] }, + { + "type": "category", + "label": "Security", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "security/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "security/configuration", + "label": "Configuration" + }, + { + "type": "doc", + "id": "security/basic-authentication", + "label": "Basic Authentication" + }, + { + "type": "doc", + "id": "security/jwt-authentication", + "label": "JWT Authentication" + }, + { + "type": "doc", + "id": "security/mtls-authentication", + "label": "mTLS Authentication" + }, + { + "type": "doc", + "id": "security/certificate-management", + "label": "Certificate Management" + }, + { + "type": "doc", + "id": "security/certificate-verification", + "label": "Certificate Verification" + } + ] + }, + { + "type": "category", + "label": "Users & Roles", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "users-and-roles/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "users-and-roles/configuration", + "label": "Configuration" + }, + { + "type": "doc", + "id": "users-and-roles/operations", + "label": "Operations" + } + ] + }, { "type": "category", "label": "Legacy", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 2b13f37d..c67b6985 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -124,28 +124,28 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/security/index.md` - **Additional Sources**: - `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: Not Started +- **Status**: In Progress ### `reference/security/basic-authentication.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/basic-auth.md` - **Additional Sources**: `versioned_docs/version-4.1/security/basic-authentication.md` - **Version Annotations**: Available since v4.1.0 -- **Status**: Not Started +- **Status**: In Progress ### `reference/security/jwt-authentication.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/jwt-auth.md` - **Additional Sources**: `versioned_docs/version-4.1/security/jwt.md` - **Version Annotations**: Available since v4.1.0 -- **Status**: Not Started +- **Status**: In Progress ### `reference/security/mtls-authentication.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/mtls-auth.md` - **Additional Sources**: `versioned_docs/version-4.3/developers/security/mtls-auth.md` - **Version Annotations**: Added in v4.3.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS support added @@ -157,7 +157,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - `versioned_docs/version-4.4+` (dynamic cert management added) - **Merge Required**: Yes - dynamic certificate management added in v4.4 - **Version Annotations**: Dynamic certs added v4.4.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Dynamic certificate management - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Certificate revocation @@ -166,19 +166,19 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-verification.md` - **Version Annotations**: Added in v4.7.0 (OCSP support) -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.7.0](release-notes/v4-tucker/4.7.0.md) - OCSP support ### `reference/security/cors.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: Not Started +- **Status**: In Progress ### `reference/security/ssl.md` - **Primary Source**: Extract from security/configuration or certificate management docs -- **Status**: Not Started +- **Status**: In Progress ### `reference/security/users-and-roles.md` @@ -188,7 +188,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - `versioned_docs/version-4.7/reference/roles.md` - Current `reference/defining-roles.md` - **Merge Required**: Yes - content spread across multiple files -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Password hashing upgrade (sha256, argon2id) - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Cookie-based sessions diff --git a/v4-docs-reference-plan.md b/v4-docs-reference-plan.md index 6b03eb9b..983c3711 100644 --- a/v4-docs-reference-plan.md +++ b/v4-docs-reference-plan.md @@ -229,11 +229,20 @@ reference/ │ │ │ ├── certificate-verification.md # Certificate verification (OCSP, etc.) │ │ -│ ├── cors.md # CORS configuration and usage +│ └── configuration.md # Authentication configuration (authorizeLocal, cacheTTL, +│ # enableSessions, token timeouts, hashFunction). Top-level +│ # `authentication:` section of harperdb-config.yaml. +│ +├── users-and-roles/ # Broken out from security/ during migration — RBAC warrants +│ │ # its own top-level section given the breadth of content +│ │ # (operations API, config file roles, permission structure). +│ │ +│ ├── overview.md # RBAC intro, roles, permission structure, operation +│ │ # restrictions reference table │ │ -│ ├── ssl.md # SSL/TLS configuration +│ ├── configuration.md # Config file roles (roles.yaml), password hashing │ │ -│ └── users-and-roles.md # User and role management including `roles` plugin +│ └── operations.md # Operations API: all role and user operations │ ├── components/ │ ├── overview.md # What are components? Evolution from custom functions to From 372eec37fa63d3a0e562b66326dd745132f1d5e7 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 17 Mar 2026 11:33:19 -0600 Subject: [PATCH 20/51] update plan docs --- v4-docs-implementation-plan.md | 51 +++++++++++++----------- v4-docs-migration-map.md | 71 ++++++++++++++++++++++------------ 2 files changed, 75 insertions(+), 47 deletions(-) diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index c0a5041e..42ce6c11 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -220,66 +220,71 @@ Updated status for this section to "In Progress" Based on migration map and reference plan, recommend this order. Each section is generated as a complete unit with all its pages at once: -**Phase 1A - Simple, Stable Sections** +**Phase 1A - Simple, Stable Sections** — **Complete** -1. **CLI** (`reference_versioned_docs/version-v4/cli/`) +1. **CLI** (`reference_versioned_docs/version-v4/cli/`) — **Complete** - `overview.md` - `commands.md` - `operations-api-commands.md` - `authentication.md` -2. **GraphQL Querying** (`reference_versioned_docs/version-v4/graphql-querying/`) +2. **GraphQL Querying** (`reference_versioned_docs/version-v4/graphql-querying/`) — **Complete** - `overview.md` -3. **Studio** (`reference_versioned_docs/version-v4/studio/`) +3. **Studio** (`reference_versioned_docs/version-v4/studio/`) — **Complete** - `overview.md` - Simple page covering local Studio UI configuration and access -4. **Fastify Routes** (`reference_versioned_docs/version-v4/fastify-routes/`) +4. **Fastify Routes** (`reference_versioned_docs/version-v4/fastify-routes/`) — **Complete** - `overview.md` -**Phase 1B - Medium Complexity** +**Phase 1B - Medium Complexity** — **Complete** -1. **Environment Variables** (`reference_versioned_docs/version-v4/environment-variables/`) +1. **Environment Variables** (`reference_versioned_docs/version-v4/environment-variables/`) — **Complete** - `overview.md` - - `configuration.md` + - ~~`configuration.md`~~ _(not created — content to be ported into `configuration/overview.md`)_ -2. **Static Files** (`reference_versioned_docs/version-v4/static-files/`) +2. **Static Files** (`reference_versioned_docs/version-v4/static-files/`) — **Complete** - `overview.md` - - `configuration.md` + - ~~`configuration.md`~~ _(not needed — all options documented inline in overview)_ -3. **HTTP** (`reference_versioned_docs/version-v4/http/`) +3. **HTTP** (`reference_versioned_docs/version-v4/http/`) — **Complete** - `overview.md` - `configuration.md` - `api.md` + - `tls.md` _(added during migration — TLS config warranted its own page)_ -4. **MQTT** (`reference_versioned_docs/version-v4/mqtt/`) +4. **MQTT** (`reference_versioned_docs/version-v4/mqtt/`) — **Complete** - `overview.md` - `configuration.md` -5. **Logging** (`reference_versioned_docs/version-v4/logging/`) +5. **Logging** (`reference_versioned_docs/version-v4/logging/`) — **Complete** - `overview.md` - `configuration.md` - `api.md` - `operations.md` -6. **Analytics** (`reference_versioned_docs/version-v4/analytics/`) +6. **Analytics** (`reference_versioned_docs/version-v4/analytics/`) — **Complete** - `overview.md` - `operations.md` **Phase 1C - Complex Sections** -1. **Security** (`reference_versioned_docs/version-v4/security/`) +1. **Security** (`reference_versioned_docs/version-v4/security/`) — **Complete** - `overview.md` - `basic-authentication.md` - `jwt-authentication.md` - `mtls-authentication.md` - `certificate-management.md` - `certificate-verification.md` - - `cors.md` - - `ssl.md` - - `users-and-roles.md` + - `configuration.md` _(consolidated from planned `cors.md` + `ssl.md`)_ + +2. **Users and Roles** (`reference_versioned_docs/version-v4/users-and-roles/`) — **Complete** + - `overview.md` + - `configuration.md` + - `operations.md` + - _Note: Broken out from Security section during migration; RBAC content warranted its own top-level section._ -2. **REST** (`reference_versioned_docs/version-v4/rest/`) +3. **REST** (`reference_versioned_docs/version-v4/rest/`) - `overview.md` - `querying.md` - `headers.md` @@ -287,7 +292,7 @@ Based on migration map and reference plan, recommend this order. Each section is - `websockets.md` - `server-sent-events.md` -3. **Database** (`reference_versioned_docs/version-v4/database/`) +4. **Database** (`reference_versioned_docs/version-v4/database/`) - `overview.md` - `schema.md` - `data-loader.md` @@ -297,19 +302,19 @@ Based on migration map and reference plan, recommend this order. Each section is - `compaction.md` - `transaction.md` -4. **Resources** (`reference_versioned_docs/version-v4/resources/`) +5. **Resources** (`reference_versioned_docs/version-v4/resources/`) - `overview.md` - `resource-api.md` - `global-apis.md` - `query-optimization.md` -5. **Components** (`reference_versioned_docs/version-v4/components/`) +6. **Components** (`reference_versioned_docs/version-v4/components/`) - `overview.md` - `applications.md` - `extension-api.md` - `plugin-api.md` -6. **Replication** (`reference_versioned_docs/version-v4/replication/`) +7. **Replication** (`reference_versioned_docs/version-v4/replication/`) - `overview.md` - `clustering.md` - `sharding.md` diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index c67b6985..0f9349b3 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -124,28 +124,28 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/security/index.md` - **Additional Sources**: - `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: In Progress +- **Status**: Complete ### `reference/security/basic-authentication.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/basic-auth.md` - **Additional Sources**: `versioned_docs/version-4.1/security/basic-authentication.md` - **Version Annotations**: Available since v4.1.0 -- **Status**: In Progress +- **Status**: Complete ### `reference/security/jwt-authentication.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/jwt-auth.md` - **Additional Sources**: `versioned_docs/version-4.1/security/jwt.md` - **Version Annotations**: Available since v4.1.0 -- **Status**: In Progress +- **Status**: Complete ### `reference/security/mtls-authentication.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/mtls-auth.md` - **Additional Sources**: `versioned_docs/version-4.3/developers/security/mtls-auth.md` - **Version Annotations**: Added in v4.3.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS support added @@ -157,7 +157,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - `versioned_docs/version-4.4+` (dynamic cert management added) - **Merge Required**: Yes - dynamic certificate management added in v4.4 - **Version Annotations**: Dynamic certs added v4.4.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Dynamic certificate management - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Certificate revocation @@ -166,33 +166,49 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-verification.md` - **Version Annotations**: Added in v4.7.0 (OCSP support) -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.7.0](release-notes/v4-tucker/4.7.0.md) - OCSP support -### `reference/security/cors.md` +### `reference/security/configuration.md` -- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: In Progress +- **Primary Source**: `versioned_docs/version-4.7/developers/security/configuration.md` +- **Status**: Complete +- **Notes**: Covers authentication configuration (authorizeLocal, cacheTTL, enableSessions, token timeouts, hashFunction), CORS, and SSL/TLS settings. Originally planned as separate `cors.md` and `ssl.md` pages; consolidated into a single `configuration.md` during migration. -### `reference/security/ssl.md` +--- -- **Primary Source**: Extract from security/configuration or certificate management docs -- **Status**: In Progress +## Users and Roles Section -### `reference/security/users-and-roles.md` +Broken out from the security section during migration — RBAC warrants its own top-level section given the breadth of content (operations API, config file roles, permission structure). + +### `reference/users-and-roles/overview.md` - **Primary Source**: `versioned_docs/version-4.7/developers/security/users-and-roles.md` - **Additional Sources**: - - `versioned_docs/version-4.7/developers/operations-api/users-and-roles.md` - `versioned_docs/version-4.7/reference/roles.md` - Current `reference/defining-roles.md` - **Merge Required**: Yes - content spread across multiple files -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Password hashing upgrade (sha256, argon2id) - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Cookie-based sessions +### `reference/users-and-roles/configuration.md` + +- **Primary Source**: `versioned_docs/version-4.7/reference/roles.md` +- **Additional Sources**: `versioned_docs/version-4.7/developers/security/configuration.md` +- **Status**: Complete +- **Notes**: Config file roles (roles.yaml), password hashing + +### `reference/users-and-roles/operations.md` + +- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/users-and-roles.md` +- **Status**: Complete +- **Notes**: Operations API — all role and user operations +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Password hashing upgrade (sha256, argon2id) + --- ## Components Section @@ -412,7 +428,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: Built-in extensions docs, configuration docs - **Version Annotations**: loadEnv added in v4.5.0 - **Status**: Complete -- **Notes**: Covers `loadEnv` extension only. Harper-level environment variable configuration (naming conventions, `HDB_CONFIG`, `HARPER_DEFAULT_CONFIG`, `HARPER_SET_CONFIG`) belongs in the Configuration section — see notes there. +- **Notes**: Covers `loadEnv` extension only. Harper-level environment variable configuration (naming conventions, `HDB_CONFIG`, `HARPER_DEFAULT_CONFIG`, `HARPER_SET_CONFIG`) belongs in the Configuration section — see notes there. The originally planned `configuration.md` sub-page was not created; that content is to be ported into `configuration/overview.md` (see Configuration section notes). - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Built-in loadEnv component @@ -464,6 +480,13 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - server.authenticateUser API +### `reference/http/tls.md` + +- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/` (TLS/certificate configuration) +- **Additional Sources**: `versioned_docs/version-4.7/deployments/configuration.md` (tls config section) +- **Status**: Complete +- **Notes**: Created during migration as a dedicated TLS configuration reference for the HTTP server. Originally not in the plan (TLS was expected to be in security section); added as a separate HTTP sub-page given the close relationship to HTTP configuration. + --- ## REST Section @@ -531,7 +554,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - mTLS support: v4.3.0 - Single-level wildcards: v4.3.0 - CRDT: v4.3.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - MQTT support introduced (QoS 0 and 1, durable sessions) - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS, single-level wildcards, retain handling, CRDT @@ -541,7 +564,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: Extract from configuration docs and real-time docs - **Version Annotations**: Port change v4.5.0 (9925 → 9933) -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Default replication port change @@ -553,7 +576,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/administration/logging/index.md` - **Additional Sources**: Current `reference/logging.md` (if exists) -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Logging revamped, consolidated into hdb.log - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Major logging improvements @@ -564,21 +587,21 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Per-component logging: v4.6.0 - Granular configuration: v4.6.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Per-component logging, dynamic reloading, HTTP logging ### `reference/logging/api.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (logger global) -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Logger based on Node.js Console API ### `reference/logging/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/logs.md` -- **Status**: In Progress +- **Status**: Complete - **Notes**: Operations for managing standard logs (not transaction/audit logs, which moved to database section) --- @@ -592,7 +615,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Version Annotations**: - Resource analytics: v4.5.0 - Storage analytics: v4.5.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Resource and storage analytics - [4.7.0](release-notes/v4-tucker/4.7.0.md) - New analytics and licensing functionality @@ -600,7 +623,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X ### `reference/analytics/operations.md` - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/analytics.md` -- **Status**: In Progress +- **Status**: Complete --- From 77b21204ee0562b84d20f6a39474d0f2e504b9cf Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 17 Mar 2026 13:14:43 -0600 Subject: [PATCH 21/51] correct links --- .../analytics-link-placeholders.md | 37 +++++----------- .../cli-link-placeholders.md | 42 +++++++------------ .../graphql-querying-link-placeholders.md | 13 +++--- .../http-link-placeholders.md | 37 +++++++--------- .../logging-link-placeholders.md | 31 +++++--------- .../mqtt-link-placeholders.md | 27 ++++-------- .../security-link-placeholders.md | 10 +++-- .../studio-link-placeholders.md | 4 ++ .../version-v4/analytics/overview.md | 10 ++--- .../version-v4/cli/authentication.md | 8 ++-- .../version-v4/cli/operations-api-commands.md | 4 +- .../version-v4/graphql-querying/overview.md | 4 +- .../version-v4/http/api.md | 2 +- .../version-v4/http/configuration.md | 2 +- .../version-v4/http/overview.md | 4 +- .../version-v4/logging/configuration.md | 2 +- .../version-v4/mqtt/configuration.md | 6 +-- .../version-v4/mqtt/overview.md | 4 +- .../security/basic-authentication.md | 2 +- .../security/mtls-authentication.md | 2 +- .../version-v4/security/overview.md | 8 ++-- .../version-v4/studio/overview.md | 2 +- 22 files changed, 105 insertions(+), 156 deletions(-) diff --git a/migration-context/link-placeholders/analytics-link-placeholders.md b/migration-context/link-placeholders/analytics-link-placeholders.md index 15a40ca0..aaaeab94 100644 --- a/migration-context/link-placeholders/analytics-link-placeholders.md +++ b/migration-context/link-placeholders/analytics-link-placeholders.md @@ -2,40 +2,25 @@ ## reference_versioned_docs/version-v4/analytics/overview.md -- Line 99: `[server.recordAnalytics()](TODO:reference_versioned_docs/version-v4/http/api.md)` - - Context: Noting that applications can record custom metrics via this API - - Target should be: HTTP API page, `server.recordAnalytics` section (file already exists) - -- Line 103: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)` - - Context: Related section — custom metrics API - - Target should be: HTTP API page (file already exists) - -- Line 105: `[analytics.logging](TODO:reference_versioned_docs/version-v4/logging/configuration.md)` - - Context: Per-component analytics logging configuration - - Target should be: Logging configuration page (already migrated in PR #450) - -- Line 106: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: analytics.aggregatePeriod configuration - - Target should be: Configuration section overview page - -- Line 110: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)` - - Context: Related section at bottom of file - - Target should be: HTTP API page (file already exists) - -- Line 111: `[Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md)` - - Context: Related section at bottom of file - - Target should be: Logging configuration page (already migrated in PR #450) - -- Line 112: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: Related section at bottom of file +- ~~Line 99: `[server.recordAnalytics()](TODO:reference_versioned_docs/version-v4/http/api.md)`~~ **RESOLVED** → `../http/api.md#serverrecordanalyticsvalue-metric-path-method-type` +- ~~Line 103: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)`~~ **RESOLVED** → `../http/api.md` +- ~~Line 105: `[Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md)`~~ **RESOLVED** → `../logging/configuration.md` +- ~~Line 110: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)`~~ **RESOLVED** → `../http/api.md` +- ~~Line 111: `[Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md)`~~ **RESOLVED** → `../logging/configuration.md` + +- Line 106 + 112: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: analytics.aggregatePeriod configuration + related section - Target should be: Configuration section overview page + - **Status**: PENDING (Configuration section migration) ## reference_versioned_docs/version-v4/analytics/operations.md - Line 56: `[search_by_conditions](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` - Context: Note that `conditions` parameter uses the same format as search_by_conditions - Target should be: Operations API operations page + - **Status**: PENDING (Operations API section migration) - Line 77: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - Context: Related section at bottom of file - Target should be: Operations API section overview page + - **Status**: PENDING (Operations API section migration) diff --git a/migration-context/link-placeholders/cli-link-placeholders.md b/migration-context/link-placeholders/cli-link-placeholders.md index 49653d73..fb3f79d3 100644 --- a/migration-context/link-placeholders/cli-link-placeholders.md +++ b/migration-context/link-placeholders/cli-link-placeholders.md @@ -190,13 +190,13 @@ This document tracks all link placeholders in the CLI section that need to be re - Line 196: `[Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles documentation")` - Context: Linking to user management and permissions (Security Best Practices section) - - Target: Security section users and roles page - - **Status**: PENDING (will be created in Security section migration) + - Target: Users and Roles section overview (moved to top-level section) + - **Status**: ~~RESOLVED~~ → `../users-and-roles/overview.md` - Line 204: `[Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md "Logging documentation")` - Context: Linking to audit logging information (Security Best Practices section) - Target: Logging section overview - - **Status**: PENDING (will be created in Logging section migration) + - **Status**: ~~RESOLVED~~ → `../logging/overview.md` - Line 256: `[CLI Overview](./overview.md)` - Context: See Also section @@ -216,24 +216,25 @@ This document tracks all link placeholders in the CLI section that need to be re - Line 259: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md "Security overview")` - Context: See Also section - Target: Security section overview page - - **Status**: PENDING (will be created in Security section migration) + - **Status**: ~~RESOLVED~~ → `../security/overview.md` - Line 260: `[Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles")` - Context: See Also section - - Target: Security section users and roles page - - **Status**: PENDING (will be created in Security section migration) + - Target: Users and Roles section overview (moved to top-level section) + - **Status**: ~~RESOLVED~~ → `../users-and-roles/overview.md` ## Summary -### Resolved Links (Within CLI Section) +### Resolved Links -- 12 links to pages within the CLI section (all resolved in this migration) -- All internal CLI section links now use relative paths (e.g., `./overview.md`) +- 12 links to pages within the CLI section (resolved in initial migration) +- `../logging/overview.md` — resolved +- `../security/overview.md` — resolved (×2) +- `../users-and-roles/overview.md` — resolved (×2, was `security/users-and-roles.md`) +- `../graphql-querying/overview.md` — resolved (×1, in operations-api-commands.md) ### Pending Links (Cross-Section References) -These will be resolved in future section migrations: - **Operations API Section** (~82 links): - `reference_versioned_docs/version-v4/operations-api/overview.md` (5 occurrences) @@ -262,15 +263,6 @@ These will be resolved in future section migrations: - `reference_versioned_docs/version-v4/database/compaction.md` (2 occurrences) - `reference_versioned_docs/version-v4/database/overview.md` (1 occurrence) -**Security Section** (4 links): - -- `reference_versioned_docs/version-v4/security/overview.md` (2 occurrences) -- `reference_versioned_docs/version-v4/security/users-and-roles.md` (2 occurrences) - -**Logging Section** (1 link): - -- `reference_versioned_docs/version-v4/logging/overview.md` - **Components Section** (1 link): - `reference_versioned_docs/version-v4/components/overview.md` @@ -279,16 +271,12 @@ These will be resolved in future section migrations: - `reference_versioned_docs/version-v4/rest/overview.md` -**GraphQL Querying Section** (1 link): - -- `reference_versioned_docs/version-v4/graphql-querying/overview.md` - -**Applications Section** (1 link): +**Applications / Components Section** (1 link): -- `reference_versioned_docs/version-v4/applications/overview.md` +- `reference_versioned_docs/version-v4/applications/overview.md` (note: this should likely be `components/overview.md`) **Learn Guides** (1 link): - Deploying Harper Applications guide (external learn link) -**Total Pending Links**: ~96 +**Total Pending Links**: ~89 diff --git a/migration-context/link-placeholders/graphql-querying-link-placeholders.md b/migration-context/link-placeholders/graphql-querying-link-placeholders.md index 553c87d0..794837f6 100644 --- a/migration-context/link-placeholders/graphql-querying-link-placeholders.md +++ b/migration-context/link-placeholders/graphql-querying-link-placeholders.md @@ -5,13 +5,10 @@ - Line 17: `[defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md "Schema definition documentation")` - Context: Introduction explaining GraphQL support for defining schemas - Target should be: Schema definition documentation in Components/Applications section + - **Status**: PENDING (Components section migration) -- Line 17: `[Resources](./resources/overview.md)` - - Context: Introduction explaining GraphQL support for querying Resources - - Target should be: Resources overview page - - Note: This is a relative link within section, but Resources section not yet migrated +- ~~Line 17: `[Resources](./resources/overview.md)`~~ **FIXED** → `TODO:reference_versioned_docs/version-v4/resources/overview.md` (was a broken relative path — resources is a sibling section, not a subdirectory) + - **Status**: PENDING (Resources section migration) -- Line 58: `[Resource Query API](./resources/overview.md#query)` - - Context: Discussing query patterns and reference to Resource Query API - - Target should be: Query section of Resources overview - - Note: This is a relative link, but Resources section not yet migrated +- ~~Line 58: `[Resource Query API](./resources/overview.md#query)`~~ **FIXED** → `TODO:reference_versioned_docs/version-v4/resources/overview.md#query` (was a broken relative path) + - **Status**: PENDING (Resources section migration) diff --git a/migration-context/link-placeholders/http-link-placeholders.md b/migration-context/link-placeholders/http-link-placeholders.md index b114a76f..9227efd1 100644 --- a/migration-context/link-placeholders/http-link-placeholders.md +++ b/migration-context/link-placeholders/http-link-placeholders.md @@ -2,56 +2,47 @@ ## reference_versioned_docs/version-v4/http/tls.md -- Line (intro): `[Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md)` - - Context: Noting that operationsApi.tls overrides the root tls section +- Line (intro + body): `[Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md)` + - Context: Noting that operationsApi.tls overrides the root tls section (appears twice) - Target should be: Configuration section operations.md page + - **Status**: PENDING (Configuration section migration) -- Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Certificate management, mTLS, and other security topics - - Target should be: Security section overview page +- ~~Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **NOTE**: Not present in file — Related section links to `../security/mtls-authentication.md` which was already resolved. ## reference_versioned_docs/version-v4/http/overview.md -- Line (TLS section): `[Security](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Directing readers to certificate management details - - Target should be: Security section overview page +- ~~Line (TLS section): `[Security](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` +- ~~Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - Line (Related section): `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - Context: Related reference for REST protocol - Target should be: REST section overview page - -- Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Related reference for security/TLS/mTLS - - Target should be: Security section overview page + - **Status**: PENDING (REST section migration) ## reference_versioned_docs/version-v4/http/configuration.md -- Line (TLS section): `[Security](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Directing readers to full certificate management details - - Target should be: Security section overview page - -- Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Related reference for TLS/mTLS configuration - - Target should be: Security section overview page +- ~~Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - Line (Related section): `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - Context: Full configuration reference - Target should be: Configuration section overview page + - **Status**: PENDING (Configuration section migration) ## reference_versioned_docs/version-v4/http/api.md +- ~~Line (server.recordAnalytics): `[analytics API](TODO:reference_versioned_docs/version-v4/analytics/overview.md)`~~ **RESOLVED** → `../analytics/overview.md` + - Line (server.operation): `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - Context: Link to operations API overview - Target should be: Operations API overview page - -- Line (server.recordAnalytics): `[analytics API](TODO:reference_versioned_docs/version-v4/analytics/overview.md)` - - Context: Link to analytics reference - - Target should be: Analytics overview page + - **Status**: PENDING (Operations API section migration) - Line (Related section): `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - Context: Related reference for REST protocol - Target should be: REST section overview page + - **Status**: PENDING (REST section migration) - Line (Related section): `[Global APIs](TODO:reference_versioned_docs/version-v4/resources/global-apis.md)` - Context: Full global API reference including tables, databases, Resource, logger, auth - Target should be: Resources global-apis page + - **Status**: PENDING (Resources section migration) diff --git a/migration-context/link-placeholders/logging-link-placeholders.md b/migration-context/link-placeholders/logging-link-placeholders.md index 212a0fc7..9bd31e66 100644 --- a/migration-context/link-placeholders/logging-link-placeholders.md +++ b/migration-context/link-placeholders/logging-link-placeholders.md @@ -2,42 +2,33 @@ ## reference_versioned_docs/version-v4/logging/overview.md -- Line 14: `[Database](TODO:reference_versioned_docs/version-v4/database/transaction.md)` +- Line 14 + 62: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - Context: Noting that audit logging and transaction logging are documented in the database section - Target should be: Database transaction/audit logging page - -- Line 62: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: Related section at bottom of file - - Target should be: Database transaction/audit logging page + - **Status**: PENDING (Database section migration) ## reference_versioned_docs/version-v4/logging/configuration.md -- Line 72: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: After describing logging.auditLog, pointing to where audit log details live +- Line 72 + 192: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: After describing logging.auditLog / related section - Target should be: Database transaction/audit logging page + - **Status**: PENDING (Database section migration) -- Line 133: `[HTTP Configuration](TODO:reference_versioned_docs/version-v4/http/configuration.md)` - - Context: Referencing HTTP logging config in the per-component section - - Target should be: HTTP configuration page (already exists: reference_versioned_docs/version-v4/http/configuration.md) - -- Line 192: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: Related section at bottom of file - - Target should be: Database transaction/audit logging page +- ~~Line 133: `[HTTP Configuration](TODO:reference_versioned_docs/version-v4/http/configuration.md)`~~ **RESOLVED** → `../http/configuration.md` - Line 193: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - Context: Related section — full config reference - Target should be: Configuration section overview page + - **Status**: PENDING (Configuration section migration) ## reference_versioned_docs/version-v4/logging/operations.md -- Line 9: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: Callout noting that audit/transaction log operations are documented in the database section - - Target should be: Database transaction/audit logging page - -- Line 76: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: Related section at bottom of file +- Line 9 + 76: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` + - Context: Callout + related section noting audit/transaction log operations are in the database section - Target should be: Database transaction/audit logging page + - **Status**: PENDING (Database section migration) - Line 77: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - Context: Related section — operations API overview - Target should be: Operations API section overview page + - **Status**: PENDING (Operations API section migration) diff --git a/migration-context/link-placeholders/mqtt-link-placeholders.md b/migration-context/link-placeholders/mqtt-link-placeholders.md index 63a2050c..5c4a9e3a 100644 --- a/migration-context/link-placeholders/mqtt-link-placeholders.md +++ b/migration-context/link-placeholders/mqtt-link-placeholders.md @@ -5,37 +5,28 @@ - Line 28: `[schema.graphql](TODO:reference_versioned_docs/version-v4/database/schema.md)` - Context: Explaining how to define a table that becomes an MQTT topic namespace - Target should be: Schema definition reference page (database section) + - **Status**: PENDING (Database section migration) -- Line 101: `[MQTT Configuration](TODO:reference_versioned_docs/version-v4/http/overview.md)` - - Context: "See HTTP Overview" in the Related section - - Target should be: HTTP overview (handles MQTT over WebSocket) - -- Line 103: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Related section — TLS and mTLS overview - - Target should be: Security section overview page +- ~~Line 101: `[HTTP Overview](TODO:reference_versioned_docs/version-v4/http/overview.md)`~~ **RESOLVED** → `../http/overview.md` +- ~~Line 103: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - Line 104: `[Database Schema](TODO:reference_versioned_docs/version-v4/database/schema.md)` - Context: Related section — defining tables/topics - Target should be: Database schema reference page + - **Status**: PENDING (Database section migration) - Line 105: `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - Context: Related section — noting REST and MQTT share the same path conventions - Target should be: REST section overview page + - **Status**: PENDING (REST section migration) ## reference_versioned_docs/version-v4/mqtt/configuration.md -- Line 20: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)` - - Context: Describing mqtt.network.securePort — links to TLS config - - Target should be: TLS configuration page (in http section) - -- Line 133: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)` - - Context: Related section link at bottom of file - - Target should be: TLS configuration page (in http section) - -- Line 134: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)` - - Context: Related section link at bottom of file - - Target should be: Security section overview page +- ~~Line 20: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)`~~ **RESOLVED** → `../http/tls.md` +- ~~Line 133: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)`~~ **RESOLVED** → `../http/tls.md` +- ~~Line 134: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - Line 135: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - Context: Related section — full harperdb-config.yaml reference - Target should be: Configuration section overview page + - **Status**: PENDING (Configuration section migration) diff --git a/migration-context/link-placeholders/security-link-placeholders.md b/migration-context/link-placeholders/security-link-placeholders.md index e5464816..2ba0604e 100644 --- a/migration-context/link-placeholders/security-link-placeholders.md +++ b/migration-context/link-placeholders/security-link-placeholders.md @@ -20,10 +20,12 @@ - Context: Replication mTLS configuration reference - Target should be: Replication clustering page -## reference_versioned_docs/version-v4/security/cors.md +## reference_versioned_docs/version-v4/security/configuration.md -- ~~Line 36: `[TODO:reference_versioned_docs/version-v4/http/configuration.md]`~~ **RESOLVED** → `../http/configuration.md` +- No pending TODO links. (`cors.md` and `ssl.md` were consolidated into this file; their cross-section links were resolved during initial migration.) -## reference_versioned_docs/version-v4/security/ssl.md +## reference_versioned_docs/version-v4/security/overview.md -- ~~Line 56: `[TODO:reference_versioned_docs/version-v4/http/tls.md]`~~ **RESOLVED** → `../http/tls.md` +- ~~`./users-and-roles.md`~~ **FIXED** → `../users-and-roles/overview.md` (users-and-roles is now a top-level section, not a file within security/) +- `TODO:reference_versioned_docs/version-v4/configuration/operations.md#network` — PENDING (Configuration section migration) +- `TODO:reference_versioned_docs/version-v4/configuration/operations.md#tls` — PENDING (Configuration section migration) diff --git a/migration-context/link-placeholders/studio-link-placeholders.md b/migration-context/link-placeholders/studio-link-placeholders.md index d20e68da..9f262e1b 100644 --- a/migration-context/link-placeholders/studio-link-placeholders.md +++ b/migration-context/link-placeholders/studio-link-placeholders.md @@ -5,3 +5,7 @@ - Line 20: `[configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio "Configuration options")` - Context: Explaining how to enable local Studio via configuration - Target should be: Configuration options page localStudio section + - **Status**: PENDING (Configuration section migration) + +- ~~Line 27: `[Operations API](TODO:reference_versioned_docs/version-v4/operations/configuration.md)`~~ **FIXED path** → `TODO:reference_versioned_docs/version-v4/operations-api/overview.md` (was wrong path — `operations` → `operations-api`) + - **Status**: PENDING (Operations API section migration) diff --git a/reference_versioned_docs/version-v4/analytics/overview.md b/reference_versioned_docs/version-v4/analytics/overview.md index e2d0d8e9..6dd7a4f2 100644 --- a/reference_versioned_docs/version-v4/analytics/overview.md +++ b/reference_versioned_docs/version-v4/analytics/overview.md @@ -123,7 +123,7 @@ Example aggregate entry: ## Standard Metrics -Harper automatically tracks the following metrics for all services. Applications can also define custom metrics via [`server.recordAnalytics()`](TODO:reference_versioned_docs/version-v4/http/api.md 'HTTP API — server.recordAnalytics'). +Harper automatically tracks the following metrics for all services. Applications can also define custom metrics via [`server.recordAnalytics()`](../http/api.md#serverrecordanalyticsvalue-metric-path-method-type). ### HTTP Metrics @@ -191,17 +191,17 @@ Includes everything returned by Node.js [`process.resourceUsage()`](https://node ## Custom Metrics -Applications can record custom metrics using the `server.recordAnalytics()` API. See [HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md 'server.recordAnalytics API') for details. +Applications can record custom metrics using the `server.recordAnalytics()` API. See [HTTP API](../http/api.md) for details. ## Analytics Configuration The `analytics.aggregatePeriod` configuration option controls how frequently aggregate summaries are written. See [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') for details. -Per-component analytics logging can be configured via `analytics.logging`. See [Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md 'analytics.logging configuration'). +Per-component analytics logging can be configured via `analytics.logging`. See [Logging Configuration](../logging/configuration.md) for details. ## Related - [Analytics Operations](./operations) -- [HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md 'server.recordAnalytics') -- [Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md 'analytics.logging') +- [HTTP API](../http/api.md) +- [Logging Configuration](../logging/configuration.md) - [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full configuration reference') diff --git a/reference_versioned_docs/version-v4/cli/authentication.md b/reference_versioned_docs/version-v4/cli/authentication.md index dc22a942..44a424b7 100644 --- a/reference_versioned_docs/version-v4/cli/authentication.md +++ b/reference_versioned_docs/version-v4/cli/authentication.md @@ -167,7 +167,7 @@ harper deploy target=https://prod.example.com:9925 ### 4. Use Least Privilege -Create dedicated users with minimal required permissions for CLI operations instead of using the main admin account. See [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md 'Users and roles documentation') for more information. +Create dedicated users with minimal required permissions for CLI operations instead of using the main admin account. See [Users and Roles](../users-and-roles/overview.md) for more information. ### 5. Rotate Credentials @@ -175,7 +175,7 @@ Regularly rotate credentials, especially for automated systems and CI/CD pipelin ### 6. Audit Access -Monitor and audit CLI operations, especially for production environments. See [Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md 'Logging documentation') for more information on logging. +Monitor and audit CLI operations, especially for production environments. See [Logging](../logging/overview.md) for more information on logging. ## Troubleshooting @@ -233,5 +233,5 @@ If environment variables aren't working: - [CLI Overview](./overview.md) - General CLI information - [CLI Commands](./commands.md) - Core CLI commands - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security overview') - Harper security features -- [Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md 'Users and roles') - User management +- [Security Overview](../security/overview.md) - Harper security features +- [Users and Roles](../users-and-roles/overview.md) - User management diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md index b3f116da..e5d787e0 100644 --- a/reference_versioned_docs/version-v4/cli/operations-api-commands.md +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -171,7 +171,7 @@ harper search_by_value table=dog search_attribute=name search_value=harper get_a ``` :::tip -For more information on querying data, see the [REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST API reference') and [GraphQL Querying](TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md 'GraphQL querying reference'). +For more information on querying data, see the [REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST API reference') and [GraphQL Querying](../graphql-querying/overview.md). ::: ### Configuration Operations @@ -231,7 +231,7 @@ harper list_roles ``` :::tip -For detailed information on users, roles, and authentication, see the [Security Reference](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security reference'). +For detailed information on users, roles, and authentication, see the [Security Reference](../security/overview.md). ::: ## Remote Operations diff --git a/reference_versioned_docs/version-v4/graphql-querying/overview.md b/reference_versioned_docs/version-v4/graphql-querying/overview.md index d9100579..8e4bbf6d 100644 --- a/reference_versioned_docs/version-v4/graphql-querying/overview.md +++ b/reference_versioned_docs/version-v4/graphql-querying/overview.md @@ -14,7 +14,7 @@ Added in: v4.4.0 (provisional) Changed in: v4.5.0 (disabled by default, configuration options) -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md 'Schema definition documentation'), and for querying [Resources](./resources/overview.md). +Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md 'Schema definition documentation'), and for querying [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview'). Get started by setting `graphql: true` in `config.yaml`. This configuration option was added in v4.5.0 to allow more granular control over the GraphQL endpoint. @@ -56,7 +56,7 @@ Accept: application/graphql-response+json > Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. -The Harper GraphQL querying system is strictly limited to exported Harper Resources. This will typically be a table that uses the `@exported` directive in its schema or `export`'ed custom resources. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resources/overview.md#query) for more complex queries. +The Harper GraphQL querying system is strictly limited to exported Harper Resources. This will typically be a table that uses the `@exported` directive in its schema or `export`'ed custom resources. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](TODO:reference_versioned_docs/version-v4/resources/overview.md#query 'Resource Query API') for more complex queries. Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: diff --git a/reference_versioned_docs/version-v4/http/api.md b/reference_versioned_docs/version-v4/http/api.md index 97431214..e523f331 100644 --- a/reference_versioned_docs/version-v4/http/api.md +++ b/reference_versioned_docs/version-v4/http/api.md @@ -324,7 +324,7 @@ server.recordAnalytics(value: number, metric: string, path?: string, method?: st | `method` | Optional HTTP method for grouping | | `type` | Optional type for grouping | -Metrics are aggregated and available via the [analytics API](TODO:reference_versioned_docs/version-v4/analytics/overview.md 'Analytics overview'). +Metrics are aggregated and available via the [analytics API](../analytics/overview.md). --- diff --git a/reference_versioned_docs/version-v4/http/configuration.md b/reference_versioned_docs/version-v4/http/configuration.md index 3a6a7413..dc4e2b84 100644 --- a/reference_versioned_docs/version-v4/http/configuration.md +++ b/reference_versioned_docs/version-v4/http/configuration.md @@ -338,5 +338,5 @@ tls: - [HTTP Overview](./overview) - [HTTP API](./api) - [TLS Configuration](./tls) -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security overview, including TLS and mTLS') +- [Security Overview](../security/overview.md) - [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full configuration reference') diff --git a/reference_versioned_docs/version-v4/http/overview.md b/reference_versioned_docs/version-v4/http/overview.md index 5d9a2365..86568c16 100644 --- a/reference_versioned_docs/version-v4/http/overview.md +++ b/reference_versioned_docs/version-v4/http/overview.md @@ -40,7 +40,7 @@ The HTTP server handles multiple protocols on the same port: HTTPS support is enabled by setting `http.securePort` in `harperdb-config.yaml` and configuring the `tls` section with a certificate and private key. The same `tls` configuration is shared by HTTPS and MQTT secure connections. -See [Configuration](./configuration) for TLS options and [Security](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security overview') for certificate management details. +See [Configuration](./configuration) for TLS options and [Security](../security/overview.md) for certificate management details. ## HTTP/2 @@ -61,4 +61,4 @@ HTTP request logging is not enabled by default. To enable it, add an `http.loggi - [HTTP Configuration](./configuration) - [HTTP API](./api) - [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface overview') -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security, TLS, mTLS, and authentication overview') +- [Security Overview](../security/overview.md) diff --git a/reference_versioned_docs/version-v4/logging/configuration.md b/reference_versioned_docs/version-v4/logging/configuration.md index 39d7911e..76b4396d 100644 --- a/reference_versioned_docs/version-v4/logging/configuration.md +++ b/reference_versioned_docs/version-v4/logging/configuration.md @@ -250,7 +250,7 @@ http: id: true # assign and log a unique request ID per request ``` -See [HTTP Configuration](TODO:reference_versioned_docs/version-v4/http/configuration.md 'HTTP logging configuration') for full details. +See [HTTP Configuration](../http/configuration.md) for full details. ### `mqtt.logging` diff --git a/reference_versioned_docs/version-v4/mqtt/configuration.md b/reference_versioned_docs/version-v4/mqtt/configuration.md index 51d0afac..3053344f 100644 --- a/reference_versioned_docs/version-v4/mqtt/configuration.md +++ b/reference_versioned_docs/version-v4/mqtt/configuration.md @@ -40,7 +40,7 @@ Type: `integer` Default: `8883` -The port for secure MQTT connections (MQTTS). Uses the `tls` configuration for certificates. See [TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md 'TLS configuration shared by HTTP and MQTT') for certificate setup. +The port for secure MQTT connections (MQTTS). Uses the `tls` configuration for certificates. See [TLS Configuration](../http/tls.md) for certificate setup. ## WebSocket @@ -226,6 +226,6 @@ tls: ## Related - [MQTT Overview](./overview) -- [TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md 'TLS configuration shared by MQTT and HTTP') -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security, certificates, and mTLS overview') +- [TLS Configuration](../http/tls.md) +- [Security Overview](../security/overview.md) - [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') diff --git a/reference_versioned_docs/version-v4/mqtt/overview.md b/reference_versioned_docs/version-v4/mqtt/overview.md index 553f4135..60895c15 100644 --- a/reference_versioned_docs/version-v4/mqtt/overview.md +++ b/reference_versioned_docs/version-v4/mqtt/overview.md @@ -136,7 +136,7 @@ Available events: ## Related - [MQTT Configuration](./configuration) -- [HTTP Overview](TODO:reference_versioned_docs/version-v4/http/overview.md 'HTTP server overview — handles MQTT over WebSocket') -- [Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md 'Security, TLS, and mTLS overview') +- [HTTP Overview](../http/overview.md) +- [Security Overview](../security/overview.md) - [Database Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Defining tables and topics with schema.graphql') - [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface — same path conventions as MQTT topics') diff --git a/reference_versioned_docs/version-v4/security/basic-authentication.md b/reference_versioned_docs/version-v4/security/basic-authentication.md index d2393fd8..45535cfd 100644 --- a/reference_versioned_docs/version-v4/security/basic-authentication.md +++ b/reference_versioned_docs/version-v4/security/basic-authentication.md @@ -7,7 +7,7 @@ title: Basic Authentication Available since: v4.1.0 -Harper supports HTTP Basic Authentication. In the context of an HTTP transaction, [Basic Authentication](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Authentication#basic_authentication_scheme) is the simplest authorization scheme which transmits credentials as username/password pairs encoded using base64. Importantly, this scheme does not encrypt credentials. If used over an insecure connection, such as HTTP, they are susceptible to being compromised. Only ever use Basic Authentication over secured connections, such as HTTPS. Even then, its better to upgrade to an encryption based authentication scheme or certificates. See [SSL / HTTPS](./ssl.md) for more information. +Harper supports HTTP Basic Authentication. In the context of an HTTP transaction, [Basic Authentication](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Authentication#basic_authentication_scheme) is the simplest authorization scheme which transmits credentials as username/password pairs encoded using base64. Importantly, this scheme does not encrypt credentials. If used over an insecure connection, such as HTTP, they are susceptible to being compromised. Only ever use Basic Authentication over secured connections, such as HTTPS. Even then, its better to upgrade to an encryption based authentication scheme or certificates. See [HTTP / TLS](../http/tls.md) for more information. ## How It Works diff --git a/reference_versioned_docs/version-v4/security/mtls-authentication.md b/reference_versioned_docs/version-v4/security/mtls-authentication.md index 82f5d22e..e194714d 100644 --- a/reference_versioned_docs/version-v4/security/mtls-authentication.md +++ b/reference_versioned_docs/version-v4/security/mtls-authentication.md @@ -63,7 +63,7 @@ Certificate revocation checking is **disabled by default** and must be explicitl ## User Identity -The username for the mTLS-authenticated request is derived from the `CN` field of the client certificate's subject. Ensure the CN value matches an existing Harper user account. See [Users and Roles](./users-and-roles.md) for managing user accounts. +The username for the mTLS-authenticated request is derived from the `CN` field of the client certificate's subject. Ensure the CN value matches an existing Harper user account. See [Users and Roles](../users-and-roles/overview.md) for managing user accounts. ## Setup Requirements diff --git a/reference_versioned_docs/version-v4/security/overview.md b/reference_versioned_docs/version-v4/security/overview.md index 1f3bb539..a47ed4a6 100644 --- a/reference_versioned_docs/version-v4/security/overview.md +++ b/reference_versioned_docs/version-v4/security/overview.md @@ -16,7 +16,7 @@ Harper's security model has two distinct layers: **Authorization** determines _what_ the caller can do. Each Harper user is assigned a role. Roles carry a permissions set that grants or denies CRUD access at the table and attribute level, in addition to controlling access to system operations. -For details on how roles and permissions work, see [Users and Roles](./users-and-roles.md). +For details on how roles and permissions work, see [Users and Roles](../users-and-roles/overview.md). ## Authentication Methods @@ -35,11 +35,11 @@ Harper supports three authentication methods: - CORS — Cross-Origin Resource Sharing. - For HTTP server configuration see [HTTP / Configuration / CORS](../http/configuration.md#cors) - - For Operations API configuration see [Operations API / Configuration / Network](TODO: ../operations-api/configuration.md#network) + - For Operations API configuration see [Operations API / Configuration / Network](TODO:reference_versioned_docs/version-v4/configuration/operations.md#network) - SSL & HTTPS — Enabling HTTPS and configuring TLS for the HTTP server. - For HTTP server configuration see [HTTP / Configuration / TLS](../http/tls.md) - - For Operations API configuration see [Operations API / Configuration / TLS](TODO: ../operations-api/configuration.md#tls) -- [Users and Roles](./users-and-roles.md) — Role-Based Access Control (RBAC): defining roles, assigning permissions, and managing users. + - For Operations API configuration see [Operations API / Configuration / TLS](TODO:reference_versioned_docs/version-v4/configuration/operations.md#tls) +- [Users and Roles](../users-and-roles/overview.md) — Role-Based Access Control (RBAC): defining roles, assigning permissions, and managing users. ## Default Behavior diff --git a/reference_versioned_docs/version-v4/studio/overview.md b/reference_versioned_docs/version-v4/studio/overview.md index c2321d6c..c904413d 100644 --- a/reference_versioned_docs/version-v4/studio/overview.md +++ b/reference_versioned_docs/version-v4/studio/overview.md @@ -24,7 +24,7 @@ localStudio: enabled: true ``` -The local studio is provided by the [Operations API](TODO:reference_versioned_docs/version-v4/operations/configuration.md) and is available on the configured `operationsApi.port` or `operationsApi.securePort` values. This is `9925` by default. +The local studio is provided by the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') and is available on the configured `operationsApi.port` or `operationsApi.securePort` values. This is `9925` by default. ## Accessing Local Studio From 91181d8e15b8c708acd65e02a1c3a49d2f45e6a4 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 17 Mar 2026 14:43:46 -0600 Subject: [PATCH 22/51] clean up version annotations --- reference_versioned_docs/version-v4/cli/commands.md | 4 ++-- .../version-v4/cli/operations-api-commands.md | 2 +- reference_versioned_docs/version-v4/cli/overview.md | 4 ++-- reference_versioned_docs/version-v4/mqtt/overview.md | 2 +- .../version-v4/security/certificate-management.md | 2 +- .../version-v4/security/certificate-verification.md | 4 +++- reference_versioned_docs/version-v4/security/configuration.md | 2 -- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/reference_versioned_docs/version-v4/cli/commands.md b/reference_versioned_docs/version-v4/cli/commands.md index 3f64de6b..2c2e9112 100644 --- a/reference_versioned_docs/version-v4/cli/commands.md +++ b/reference_versioned_docs/version-v4/cli/commands.md @@ -66,7 +66,7 @@ For more information on installation, see [Getting Started / Install and Connect ### `harper run` -Added in: v4.2.0 (confirmed via release notes) +Added in: v4.2.0 Run a Harper application from any location as a foreground, standard process (similar to `harper`). @@ -78,7 +78,7 @@ This command runs Harper with the specified application directory without automa ### `harper dev` -Added in: v4.2.0 (confirmed via release notes) +Added in: v4.2.0 Run Harper in development mode from a specified directory with automatic reloading. Recommended for local application development. Operates similar to `harper` and `harper run`. diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md index e5d787e0..9fa7c540 100644 --- a/reference_versioned_docs/version-v4/cli/operations-api-commands.md +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -7,7 +7,7 @@ title: Operations API Commands # Operations API Commands -Added in: v4.3.0 (confirmed via release notes) +Added in: v4.3.0 The Harper CLI supports executing operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') directly from the command line. This enables powerful automation and scripting capabilities. diff --git a/reference_versioned_docs/version-v4/cli/overview.md b/reference_versioned_docs/version-v4/cli/overview.md index 079322f6..467cdc67 100644 --- a/reference_versioned_docs/version-v4/cli/overview.md +++ b/reference_versioned_docs/version-v4/cli/overview.md @@ -113,7 +113,7 @@ See [CLI Commands](./commands.md) for detailed documentation on each command. ## Operations API Commands -Added in: v4.3.0 (confirmed via release notes) +Added in: v4.3.0 The Harper CLI supports executing most operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') directly from the command line. This includes operations that do not require complex nested parameters. @@ -174,7 +174,7 @@ harper describe_database database=dev target=https://server.com:9925 username=HD ## Development Mode -Added in: v4.2.0 (confirmed via release notes) +Added in: v4.2.0 For local application and component development, use `harper dev`: diff --git a/reference_versioned_docs/version-v4/mqtt/overview.md b/reference_versioned_docs/version-v4/mqtt/overview.md index 60895c15..bb9d7c55 100644 --- a/reference_versioned_docs/version-v4/mqtt/overview.md +++ b/reference_versioned_docs/version-v4/mqtt/overview.md @@ -53,7 +53,7 @@ Harper supports multi-level topics for both publishing and subscribing: ### Last Will -Added in: v4.3.0 (inferred from version comparison, needs verification) +Added in: v4.3.0 Harper supports the MQTT Last Will and Testament feature. If a client disconnects unexpectedly, the broker publishes the configured will message on its behalf. diff --git a/reference_versioned_docs/version-v4/security/certificate-management.md b/reference_versioned_docs/version-v4/security/certificate-management.md index 3f76e644..7beb1585 100644 --- a/reference_versioned_docs/version-v4/security/certificate-management.md +++ b/reference_versioned_docs/version-v4/security/certificate-management.md @@ -122,7 +122,7 @@ For full configuration options and troubleshooting, see [Certificate Verificatio ## Dynamic Certificate Management -Added in: v4.4.0 (confirmed via release notes) +Added in: v4.4.0 Certificates — including CAs and private keys — can be dynamically managed without restarting Harper. diff --git a/reference_versioned_docs/version-v4/security/certificate-verification.md b/reference_versioned_docs/version-v4/security/certificate-verification.md index 8a6aa673..7a719585 100644 --- a/reference_versioned_docs/version-v4/security/certificate-verification.md +++ b/reference_versioned_docs/version-v4/security/certificate-verification.md @@ -7,7 +7,9 @@ title: Certificate Verification -Added in: v4.7.0 (OCSP support confirmed via release notes; certificate revocation support added in v4.5.0) +Added in: v4.5.0 + +Changed in: v4.7.0 (OCSP support added) Certificate verification (also called certificate revocation checking) ensures that revoked certificates cannot be used for mTLS authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date — due to compromise, employee departure, or other security concerns. diff --git a/reference_versioned_docs/version-v4/security/configuration.md b/reference_versioned_docs/version-v4/security/configuration.md index 758391e9..717bc180 100644 --- a/reference_versioned_docs/version-v4/security/configuration.md +++ b/reference_versioned_docs/version-v4/security/configuration.md @@ -62,8 +62,6 @@ Password hashing algorithm used when storing user passwords. Replaced the previo - **`sha256`** — Default. Good security and excellent performance. - **`argon2id`** — Highest security. More CPU-intensive; recommended for environments that do not require frequent password verifications. - - ## Related - [JWT Authentication](./jwt-authentication.md) From ac8b9c90fb32e48a2e3eec05e86831d9cb3e0ebe Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Wed, 18 Mar 2026 16:43:37 -0600 Subject: [PATCH 23/51] REST Section Migration (#457) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds all 5 pages of the REST reference section: - overview.md: HTTP methods (GET/PUT/POST/PATCH/DELETE), URL structure, ETag caching, OpenAPI endpoint - querying.md: Full URL query syntax — FIQL operators, unions, grouping, select/limit/sort functions, relationships/joins, null queries, type conversion - headers.md: Request/response headers (ETag, If-None-Match, Content-Type, Accept, etc.) - content-types.md: JSON, CBOR, MessagePack, CSV — with encoding recommendations - websockets.md: WebSocket connections, custom connect() handler, MQTT-over-WS, message ordering - server-sent-events.md: SSE connections, one-directional streaming, connect() usage Also adds rest-link-placeholders.md and marks REST section Complete in migration map. Co-authored-by: Claude Sonnet 4.6 --- .../rest-link-placeholders.md | 39 +++ .../version-v4/rest/content-types.md | 100 +++++++ .../version-v4/rest/headers.md | 97 +++++++ .../version-v4/rest/overview.md | 159 +++++++++++ .../version-v4/rest/querying.md | 261 ++++++++++++++++++ .../version-v4/rest/server-sent-events.md | 64 +++++ .../version-v4/rest/websockets.md | 106 +++++++ .../version-v4-sidebars.json | 38 +++ v4-docs-migration-map.md | 13 +- 9 files changed, 870 insertions(+), 7 deletions(-) create mode 100644 migration-context/link-placeholders/rest-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/rest/content-types.md create mode 100644 reference_versioned_docs/version-v4/rest/headers.md create mode 100644 reference_versioned_docs/version-v4/rest/overview.md create mode 100644 reference_versioned_docs/version-v4/rest/querying.md create mode 100644 reference_versioned_docs/version-v4/rest/server-sent-events.md create mode 100644 reference_versioned_docs/version-v4/rest/websockets.md diff --git a/migration-context/link-placeholders/rest-link-placeholders.md b/migration-context/link-placeholders/rest-link-placeholders.md new file mode 100644 index 00000000..9d02b15b --- /dev/null +++ b/migration-context/link-placeholders/rest-link-placeholders.md @@ -0,0 +1,39 @@ +# Link Placeholders for REST Section + +## reference_versioned_docs/version-v4/rest/overview.md + +- Line (See Also): `[Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition')` + - Context: Intro and See Also section — how to define and export resources + - Target should be: Database / Schema page + - **Status**: PENDING (Database section migration) + +## reference_versioned_docs/version-v4/rest/querying.md + +- Line (directURLMapping section): `[Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema and resource configuration')` + - Context: directURLMapping option reference + - Target should be: Database / Schema page + - **Status**: PENDING (Database section migration) + +- Line (See Also): `[Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition')` + - Context: See Also section + - Target should be: Database / Schema page + - **Status**: PENDING (Database section migration) + +## reference_versioned_docs/version-v4/rest/websockets.md + +- Line (Custom connect() Handler): `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API reference')` + - Context: Inline link for more on implementing custom resources + - Target should be: Resources / Resource API page + - **Status**: PENDING (Resources section migration) + +- Line (See Also): `[Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview')` + - Context: Link to custom resource API including `connect()` method + - Target should be: Resources section overview page + - **Status**: PENDING (Resources section migration) + +## reference_versioned_docs/version-v4/rest/server-sent-events.md + +- Line (See Also): `[Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview')` + - Context: Link to custom resource API including `connect()` method + - Target should be: Resources section overview page + - **Status**: PENDING (Resources section migration) diff --git a/reference_versioned_docs/version-v4/rest/content-types.md b/reference_versioned_docs/version-v4/rest/content-types.md new file mode 100644 index 00000000..bcd2217f --- /dev/null +++ b/reference_versioned_docs/version-v4/rest/content-types.md @@ -0,0 +1,100 @@ +--- +title: Content Types +--- + + + + +# Content Types + +Harper supports multiple content types (MIME types) for both HTTP request bodies and response bodies. Harper follows HTTP standards: use the `Content-Type` request header to specify the encoding of the request body, and use the `Accept` header to request a specific response format. + +```http +Content-Type: application/cbor +Accept: application/cbor +``` + +All content types work with any standard Harper operation. + +## Supported Formats + +### JSON — `application/json` + +JSON is the most widely used format, readable and easy to work with. It is well-supported across all HTTP tooling. + +**Limitations**: JSON does not natively support all Harper data types — binary data, `Date`, `Map`, and `Set` values require special handling. JSON also produces larger payloads than binary formats. + +**When to use**: Web development, debugging, interoperability with third-party clients, or when the standard JSON type set is sufficient. Pairing JSON with compression (`Accept-Encoding: gzip, br`) often yields compact network transfers due to favorable Huffman coding characteristics. + +### CBOR — `application/cbor` + +CBOR is the recommended format for most production use cases. It is a highly efficient binary format with native support for the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. + +**Advantages**: Very compact encoding, fast serialization, native streaming support (indefinite-length arrays for optimal time-to-first-byte on query results). Well-standardized with growing ecosystem support. + +**When to use**: Production APIs, performance-sensitive applications, or any scenario requiring rich data types. + +### MessagePack — `application/x-msgpack` + +MessagePack is another efficient binary format similar to CBOR, with broader adoption in some ecosystems. It supports all Harper data types. + +**Limitations**: MessagePack does not natively support streaming arrays, so query results are returned as a concatenated sequence of MessagePack objects. Decoders must be prepared to handle a sequence of values rather than a single document. + +**When to use**: Systems with existing MessagePack support that don't have CBOR available, or when interoperability with MessagePack clients is required. CBOR is generally preferred when both are available. + +### CSV — `text/csv` + +Comma-separated values format, suitable for data export and spreadsheet import/export. CSV lacks hierarchical structure and explicit typing. + +**When to use**: Ad-hoc data export, spreadsheet workflows, batch data processing. Not recommended for frequent or production API use. + +## Content Type via URL Extension + +As an alternative to the `Accept` header, responses can be requested in a specific format using file-style URL extensions: + +```http +GET /product/some-id.csv +GET /product/.msgpack?category=software +``` + +Using the `Accept` header is the recommended approach for clean, standard HTTP interactions. + +## Custom Content Types + +Harper's content type system is extensible. Custom handlers for any serialization format (XML, YAML, proprietary formats, etc.) can be registered in the [`contentTypes`](../resources/global-apis.md) global Map. + +## Storing Arbitrary Content Types + +When a `PUT` or `POST` is made with a non-standard content type (e.g., `text/calendar`, `image/gif`), Harper stores the content as a record with `contentType` and `data` properties: + +```http +PUT /my-resource/33 +Content-Type: text/calendar + +BEGIN:VCALENDAR +VERSION:2.0 +... +``` + +This stores a record equivalent to: + +```json +{ "contentType": "text/calendar", "data": "BEGIN:VCALENDAR\nVERSION:2.0\n..." } +``` + +Retrieving a record that has `contentType` and `data` properties returns the response with the specified `Content-Type` and body. If the content type is not from the `text` family, the data is treated as binary (a Node.js `Buffer`). + +Use `application/octet-stream` for binary data or for uploading to a specific property: + +```http +PUT /my-resource/33/image +Content-Type: image/gif + +...image data... +``` + +## See Also + +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Headers](./headers.md) — Content negotiation headers +- [Querying](./querying.md) — URL query syntax diff --git a/reference_versioned_docs/version-v4/rest/headers.md b/reference_versioned_docs/version-v4/rest/headers.md new file mode 100644 index 00000000..818398eb --- /dev/null +++ b/reference_versioned_docs/version-v4/rest/headers.md @@ -0,0 +1,97 @@ +--- +title: REST Headers +--- + + + + +# REST Headers + +Harper's REST interface uses standard HTTP headers for content negotiation, caching, and performance instrumentation. + +## Response Headers + +These headers are included in all Harper REST API responses: + +| Header | Example Value | Description | +| --------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `server-timing` | `db;dur=7.165` | Duration of the operation in milliseconds. Follows the [Server-Timing](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Server-Timing) standard and can be consumed by network monitoring tools. | +| `content-type` | `application/json` | MIME type of the returned content, negotiated based on the `Accept` request header. | +| `etag` | `"abc123"` | Encoded version/last-modification time of the returned record. Used for conditional requests. | +| `location` | `/MyTable/new-id` | Returned on `POST` responses. Contains the path to the newly created record. | + +## Request Headers + +### Content-Type + +Specifies the format of the request body (for `PUT`, `PATCH`, `POST`): + +```http +Content-Type: application/json +Content-Type: application/cbor +Content-Type: application/x-msgpack +Content-Type: text/csv +``` + +See [Content Types](./content-types.md) for the full list of supported formats. + +### Accept + +Specifies the preferred response format: + +```http +Accept: application/json +Accept: application/cbor +Accept: application/x-msgpack +Accept: text/csv +``` + +### If-None-Match + +Used for conditional GET requests. Provide the `ETag` value from a previous response to avoid re-fetching unchanged data: + +```http +GET /MyTable/123 +If-None-Match: "abc123" +``` + +If the record has not changed, Harper returns `304 Not Modified` with no body. This avoids serialization and network transfer overhead and works seamlessly with browser caches and external HTTP caches. + +### Accept-Encoding + +Harper supports standard HTTP compression. Including this header enables compressed responses: + +```http +Accept-Encoding: gzip, br +``` + +Compression is particularly effective for JSON responses. For binary formats like CBOR, compression provides diminishing returns compared to the already-compact encoding. + +### Authorization + +Credentials for authenticating requests. See [Security Overview](../security/overview.md) for details on supported authentication mechanisms (Basic, JWT, mTLS). + +### Sec-WebSocket-Protocol + +When connecting via WebSocket for MQTT, the sub-protocol must be set to `mqtt` as required by the MQTT specification: + +```http +Sec-WebSocket-Protocol: mqtt +``` + +## Content Type via URL Extension + +As an alternative to the `Accept` header, content types can be specified using file-style extensions in the URL path: + +```http +GET /product/some-id.csv +GET /product/.msgpack?category=software +``` + +This is not recommended for production use — prefer the `Accept` header for clean, standard HTTP interactions. + +## See Also + +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Content Types](./content-types.md) — Supported encoding formats +- [Security Overview](../security/overview.md) — Authentication headers and mechanisms diff --git a/reference_versioned_docs/version-v4/rest/overview.md b/reference_versioned_docs/version-v4/rest/overview.md new file mode 100644 index 00000000..54574101 --- /dev/null +++ b/reference_versioned_docs/version-v4/rest/overview.md @@ -0,0 +1,159 @@ +--- +title: REST Overview +--- + + + + + + +# REST Overview + +Added in: v4.2.0 + +Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation over HTTP, providing the best performance and HTTP interoperability with different clients. + +## How the REST Interface Works + +Harper's REST interface exposes database tables and custom resources as RESTful endpoints. Tables are **not** exported by default; they must be explicitly exported in a schema definition. The name of the exported resource defines the base of the endpoint path, served on the application HTTP server port (default `9926`). + +For more on defining schemas and exporting resources, see [TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition']. + +## Configuration + +Enable the REST interface by adding the `rest` plugin to your application's `config.yaml`: + +```yaml +rest: true +``` + +**Options**: + +```yaml +rest: + lastModified: true # enables Last-Modified response header support + webSocket: false # disables automatic WebSocket support (enabled by default) +``` + +## URL Structure + +The REST interface follows a consistent URL structure: + +| Path | Description | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| `/my-resource` | Root path — returns a description of the resource (e.g., table metadata) | +| `/my-resource/` | Trailing slash indicates a collection — represents all records; append query parameters to search | +| `/my-resource/record-id` | A specific record identified by its primary key | +| `/my-resource/record-id/` | Trailing slash — the collection of records with the given id prefix | +| `/my-resource/record-id/with/multiple/parts` | Record id with multiple path segments | + +Changed in: v4.5.0 — Resources can be defined with nested paths and accessed by exact path without a trailing slash. The `id.property` dot syntax for accessing properties via URL is only applied to properties declared in a schema. + +## HTTP Methods + +REST operations map to HTTP methods following uniform interface principles: + +### GET + +Retrieve a record or perform a search. Handled by the resource's `get()` method. + +```http +GET /MyTable/123 +``` + +Returns the record with primary key `123`. + +```http +GET /MyTable/?name=Harper +``` + +Returns records matching `name=Harper`. See [Querying](./querying.md) for the full query syntax. + +```http +GET /MyTable/123.propertyName +``` + +Returns a single property of a record. Only works for properties declared in the schema. + +#### Conditional Requests and Caching + +GET responses include an `ETag` header encoding the record's version/last-modification time. Clients with a cached copy can include `If-None-Match` on subsequent requests. If the record hasn't changed, Harper returns `304 Not Modified` with no body — avoiding serialization and network transfer overhead. + +### PUT + +Create or replace a record with a specified primary key (upsert semantics). Handled by the resource's `put(record)` method. The stored record will exactly match the submitted body — any properties not included in the body are removed from the previous record. + +```http +PUT /MyTable/123 +Content-Type: application/json + +{ "name": "some data" } +``` + +Creates or replaces the record with primary key `123`. + +### POST + +Create a new record without specifying a primary key, or trigger a custom action. Handled by the resource's `post(data)` method. The auto-assigned primary key is returned in the `Location` response header. + +```http +POST /MyTable/ +Content-Type: application/json + +{ "name": "some data" } +``` + +### PATCH + +Partially update a record, merging only the provided properties (CRDT-style update). Unspecified properties are preserved. + +Added in: v4.3.0 + +```http +PATCH /MyTable/123 +Content-Type: application/json + +{ "status": "active" } +``` + +### DELETE + +Delete a specific record or all records matching a query. + +```http +DELETE /MyTable/123 +``` + +Deletes the record with primary key `123`. + +```http +DELETE /MyTable/?status=archived +``` + +Deletes all records matching `status=archived`. + +## Content Types + +Harper supports multiple content types for both request bodies and responses. Use the `Content-Type` header for request bodies and the `Accept` header to request a specific response format. + +See [Content Types](./content-types.md) for the full list of supported formats and encoding recommendations. + +## OpenAPI + +Added in: v4.3.0 + +Harper automatically generates an OpenAPI specification for all resources exported via a schema. This endpoint is available at: + +```http +GET /openapi +``` + +## See Also + +- [Querying](./querying.md) — Full URL query syntax, operators, and examples +- [Headers](./headers.md) — HTTP headers used by the REST interface +- [Content Types](./content-types.md) — Supported formats (JSON, CBOR, MessagePack, CSV) +- [WebSockets](./websockets.md) — Real-time connections via WebSocket +- [Server-Sent Events](./server-sent-events.md) — One-way streaming via SSE +- [HTTP Server](../http/overview.md) — Underlying HTTP server configuration +- [Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition') — How to define and export resources diff --git a/reference_versioned_docs/version-v4/rest/querying.md b/reference_versioned_docs/version-v4/rest/querying.md new file mode 100644 index 00000000..83070f56 --- /dev/null +++ b/reference_versioned_docs/version-v4/rest/querying.md @@ -0,0 +1,261 @@ +--- +title: REST Querying +--- + + + + + + +# REST Querying + +Harper's REST interface supports a rich URL-based query language for filtering, sorting, selecting, and limiting records. Queries are expressed as URL query parameters on collection paths. + +## Basic Attribute Filtering + +Search by attribute name and value using query parameters. The queried attribute must be indexed. + +```http +GET /Product/?category=software +``` + +Multiple attributes can be combined — only one needs to be indexed for the query to execute: + +```http +GET /Product/?category=software&inStock=true +``` + +### Null Queries + +Added in: v4.3.0 + +Query for null values or non-null values: + +```http +GET /Product/?discount=null +``` + +Note: Only indexes created in v4.3.0 or later support null indexing. Existing indexes must be rebuilt (removed and re-added) to support null queries. + +## Comparison Operators (FIQL) + +Harper uses [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax for comparison operators: + +| Operator | Meaning | +| -------------------- | -------------------------------------- | +| `==` | Equal | +| `=lt=` | Less than | +| `=le=` | Less than or equal | +| `=gt=` | Greater than | +| `=ge=` | Greater than or equal | +| `=ne=`, `!=` | Not equal | +| `=ct=` | Contains (strings) | +| `=sw=`, `==*` | Starts with (strings) | +| `=ew=` | Ends with (strings) | +| `=`, `===` | Strict equality (no type conversion) | +| `!==` | Strict inequality (no type conversion) | + +**Examples**: + +```http +GET /Product/?price=gt=100 +GET /Product/?price=le=20 +GET /Product/?name==Keyboard* +GET /Product/?category=software&price=gt=100&price=lt=200 +``` + +For date fields, colons must be URL-encoded as `%3A`: + +```http +GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z +``` + +### Chained Conditions (Range) + +Omit the attribute name on the second condition to chain it against the same attribute: + +```http +GET /Product/?price=gt=100<=200 +``` + +Chaining supports `gt`/`ge` combined with `lt`/`le` for range queries. No other chaining combinations are currently supported. + +### Type Conversion + +For FIQL comparators (`==`, `!=`, `=gt=`, etc.), Harper applies automatic type conversion: + +| Syntax | Behavior | +| ----------------------------------------- | ------------------------------------------- | +| `name==null` | Converts to `null` | +| `name==123` | Converts to number if attribute is untyped | +| `name==true` | Converts to boolean if attribute is untyped | +| `name==number:123` | Explicit number conversion | +| `name==boolean:true` | Explicit boolean conversion | +| `name==string:some%20text` | Keep as string with URL decode | +| `name==date:2024-01-05T20%3A07%3A27.955Z` | Explicit Date conversion | + +If the attribute specifies a type in the schema (e.g., `Float`), values are always converted to that type before searching. + +For strict operators (`=`, `===`, `!==`), no automatic type conversion is applied — the value is decoded as a URL-encoded string, and the attribute type (if declared in the schema) dictates type conversion. + +## Unions (OR Logic) + +Use `|` instead of `&` to combine conditions with OR logic: + +```http +GET /Product/?rating=5|featured=true +``` + +## Grouping + +Use parentheses or square brackets to control order of operations: + +```http +GET /Product/?rating=5|(price=gt=100&price=lt=200) +``` + +Square brackets are recommended when constructing queries from user input because standard URI encoding safely encodes `[` and `]` (but not `(`): + +```http +GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] +``` + +Constructing from JavaScript: + +```javascript +let url = `/Product/?rating=5&[${tags.map(encodeURIComponent).join('|')}]`; +``` + +Groups can be nested for complex conditions: + +```http +GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] +``` + +## Query Functions + +Harper supports special query functions using call syntax, included in the query string separated by `&`. + +### `select(properties)` + +Specify which properties to include in the response. + +| Syntax | Returns | +| -------------------------------------- | ------------------------------------------- | +| `?select(property)` | Values of a single property directly | +| `?select(property1,property2)` | Objects with only the specified properties | +| `?select([property1,property2])` | Arrays of property values | +| `?select(property1,)` | Objects with a single specified property | +| `?select(property{subProp1,subProp2})` | Nested objects with specific sub-properties | + +**Examples**: + +```http +GET /Product/?category=software&select(name) +GET /Product/?brand.name=Microsoft&select(name,brand{name}) +``` + +### `limit(end)` or `limit(start,end)` + +Limit the number of results returned, with an optional starting offset. + +```http +GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) +GET /Product/?rating=gt=3&limit(10,30) +``` + +### `sort(property)` or `sort(+property,-property,...)` + +Sort results by one or more properties. Prefix `+` or no prefix = ascending; `-` = descending. Multiple properties break ties in order. + +```http +GET /Product/?rating=gt=3&sort(+name) +GET /Product/?sort(+rating,-price) +``` + +Added in: v4.3.0 + +## Relationships and Joins + +Added in: v4.3.0 + +Harper supports querying across related tables through dot-syntax chained attributes. Relationships must be defined in the schema using `@relation`. + +**Schema example**: + +```graphql +type Product @table @export { + id: ID @primaryKey + name: String + brandId: ID @indexed + brand: Brand @relation(from: "brandId") +} +type Brand @table @export { + id: ID @primaryKey + name: String + products: [Product] @relation(to: "brandId") +} +``` + +**Query by related attribute** (INNER JOIN behavior): + +```http +GET /Product/?brand.name=Microsoft +GET /Brand/?products.name=Keyboard +``` + +### Nested Select with Joins + +Relationship attributes are not included by default. Use `select()` to include them: + +```http +GET /Product/?brand.name=Microsoft&select(name,brand) +GET /Product/?brand.name=Microsoft&select(name,brand{name}) +GET /Product/?name=Keyboard&select(name,brand{name,id}) +``` + +When selecting without a filter on the related table, this acts as a LEFT JOIN — the relationship property is omitted if the foreign key is null or references a non-existent record. + +### Many-to-Many Relationships + +Many-to-many relationships can be modeled with an array of foreign key values, without a junction table: + +```graphql +type Product @table @export { + id: ID @primaryKey + name: String + resellerIds: [ID] @indexed + resellers: [Reseller] @relation(from: "resellerId") +} +``` + +```http +GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) +``` + +The array order of `resellerIds` is preserved when resolving the relationship. + +## Property Access via URL + +Changed in: v4.5.0 + +Access a specific property of a record by appending it with dot syntax to the record id: + +```http +GET /MyTable/123.propertyName +``` + +This only works for properties declared in the schema. As of v4.5.0, dots in URL paths are no longer interpreted as property access for undeclared properties, allowing URLs to generally include dots without being misinterpreted. + +## `directURLMapping` Option + +Added in: v4.5.0 + +Resources can be configured with `directURLMapping: true` for more direct URL path handling. When enabled, the URL path is mapped more directly to the resource without the default query parameter parsing semantics. See [Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema and resource configuration') for configuration details. + +## See Also + +- [REST Overview](./overview.md) — HTTP methods, URL structure, and caching +- [Headers](./headers.md) — Request and response headers +- [Content Types](./content-types.md) — Encoding formats +- [Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition') — Defining schemas, relationships, and indexes diff --git a/reference_versioned_docs/version-v4/rest/server-sent-events.md b/reference_versioned_docs/version-v4/rest/server-sent-events.md new file mode 100644 index 00000000..e7decfde --- /dev/null +++ b/reference_versioned_docs/version-v4/rest/server-sent-events.md @@ -0,0 +1,64 @@ +--- +title: Server-Sent Events +--- + + + + +# Server-Sent Events + +Added in: v4.2.0 + +Harper supports Server-Sent Events (SSE), a simple and efficient mechanism for browser-based applications to receive real-time updates from the server over a standard HTTP connection. SSE is a one-directional transport — the server pushes events to the client, and the client has no way to send messages back on the same connection. + +## Connecting + +SSE connections are made by targeting a resource URL. By default, connecting to a resource path subscribes to changes for that resource and streams events as they occur. + +```javascript +let eventSource = new EventSource('https://server/my-resource/341', { + withCredentials: true, +}); + +eventSource.onmessage = (event) => { + let data = JSON.parse(event.data); +}; +``` + +The URL path maps to the resource in the same way as REST and WebSocket connections. Connecting to `/my-resource/341` subscribes to updates for the record with id `341` in the `my-resource` table (or custom resource). + +## `connect()` Handler + +SSE connections use the same `connect()` method as WebSockets on resource classes, with one key difference: since SSE is one-directional, `connect()` is called without an `incomingMessages` argument. + +```javascript +export class MyResource extends Resource { + async *connect() { + // yield messages to send to the client + while (true) { + await someCondition(); + yield { event: 'update', data: { value: 42 } }; + } + } +} +``` + +The default `connect()` behavior subscribes to the resource and streams changes automatically. + +## When to Use SSE vs WebSockets + +| | SSE | WebSockets | +| --------------- | ------------------------------------- | -------------------------------- | +| Direction | Server → Client only | Bidirectional | +| Transport | Standard HTTP | HTTP upgrade | +| Browser support | Native `EventSource` API | Native `WebSocket` API | +| Use case | Live feeds, dashboards, notifications | Interactive real-time apps, MQTT | + +SSE is simpler to implement and has built-in reconnection in browsers. For scenarios requiring bidirectional communication, use [WebSockets](./websockets.md). + +## See Also + +- [WebSockets](./websockets.md) — Bidirectional real-time connections +- [MQTT Overview](../mqtt/overview.md) — Full MQTT pub/sub documentation +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview') — Custom resource API including `connect()` diff --git a/reference_versioned_docs/version-v4/rest/websockets.md b/reference_versioned_docs/version-v4/rest/websockets.md new file mode 100644 index 00000000..e38925b4 --- /dev/null +++ b/reference_versioned_docs/version-v4/rest/websockets.md @@ -0,0 +1,106 @@ +--- +title: WebSockets +--- + + + + + +# WebSockets + +Added in: v4.2.0 + +Harper supports WebSocket connections through the REST interface, enabling real-time bidirectional communication with resources. WebSocket connections target a resource URL path — by default, connecting to a resource subscribes to changes for that resource. + +## Configuration + +WebSocket support is enabled automatically when the `rest` plugin is enabled. To disable it: + +```yaml +rest: + webSocket: false +``` + +## Connecting + +A WebSocket connection to a resource URL subscribes to that resource and streams change events: + +```javascript +let ws = new WebSocket('wss://server/my-resource/341'); +ws.onmessage = (event) => { + let data = JSON.parse(event.data); +}; +``` + +By default, `new WebSocket('wss://server/my-resource/341')` accesses the resource defined for `my-resource` with record id `341` and subscribes to it. When the record changes or a message is published to it, the WebSocket connection receives the update. + +## Custom `connect()` Handler + +WebSocket behavior is driven by the `connect(incomingMessages)` method on a resource class. The method must return an async iterable (or generator) that produces messages to send to the client. For more on implementing custom resources, see [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API reference'). + +**Simple echo server**: + +```javascript +export class Echo extends Resource { + async *connect(incomingMessages) { + for await (let message of incomingMessages) { + yield message; // echo each message back + } + } +} +``` + +**Using the default connect with event-style access**: + +The default `connect()` returns a convenient streaming iterable with: + +- A `send(message)` method for pushing outgoing messages +- A `close` event for cleanup on disconnect + +```javascript +export class Example extends Resource { + connect(incomingMessages) { + let outgoingMessages = super.connect(); + + let timer = setInterval(() => { + outgoingMessages.send({ greeting: 'hi again!' }); + }, 1000); + + incomingMessages.on('data', (message) => { + outgoingMessages.send(message); // echo incoming messages + }); + + outgoingMessages.on('close', () => { + clearInterval(timer); + }); + + return outgoingMessages; + } +} +``` + +## MQTT over WebSockets + +Harper also supports MQTT over WebSockets. The sub-protocol must be set to `mqtt` as required by the MQTT specification: + +```http +Sec-WebSocket-Protocol: mqtt +``` + +See [MQTT Overview](../mqtt/overview.md) for full MQTT documentation. + +## Message Ordering in Distributed Environments + +Harper prioritizes low-latency delivery in distributed (multi-node) environments. Messages are delivered to local subscribers immediately upon arrival — Harper does not delay messages for inter-node coordination. + +In a scenario where messages arrive out-of-order across nodes: + +- **Non-retained messages** (published without a `retain` flag): Every message is delivered to subscribers in the order received, even if out-of-order relative to other nodes. Good for use cases like chat where every message must be delivered. +- **Retained messages** (published with `retain`, or PUT/updated in the database): Only the message with the latest timestamp is kept as the "winning" record. Out-of-order older messages are not re-delivered. This ensures eventual consistency of the most recent record state across the cluster. Good for use cases like sensor readings where only the latest value matters. + +## See Also + +- [Server-Sent Events](./server-sent-events.md) — One-way real-time streaming +- [MQTT Overview](../mqtt/overview.md) — Full MQTT pub/sub documentation +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview') — Custom resource API including `connect()` diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index a7d64c80..299587d6 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -87,6 +87,44 @@ } ] }, + { + "type": "category", + "label": "REST", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "rest/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "rest/content-types", + "label": "Content Types" + }, + { + "type": "doc", + "id": "rest/headers", + "label": "Headers" + }, + { + "type": "doc", + "id": "rest/querying", + "label": "Querying" + }, + { + "type": "doc", + "id": "rest/websockets", + "label": "WebSockets" + }, + { + "type": "doc", + "id": "rest/server-sent-events", + "label": "Server Sent Events" + } + ] + }, { "type": "category", "label": "Logging", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 0f9349b3..479c7eee 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -495,7 +495,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/developers/rest.md` - **Additional Sources**: Current `reference/rest.md` -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - REST interface introduced @@ -507,7 +507,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Version Annotations**: - Null indexing/querying: v4.3.0 - URL path improvements: v4.5.0 -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Iterator-based queries - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Relationships/joins, sorting, nested select, null indexing @@ -517,27 +517,26 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/headers.md` - **Additional Sources**: Current `reference/headers.md` -- **Version Annotations**: Track which headers were added/removed over versions -- **Status**: Not Started +- **Status**: Complete ### `reference/rest/content-types.md` - **Primary Source**: `versioned_docs/version-4.7/reference/content-types.md` - **Additional Sources**: Current `reference/content-types.md` -- **Status**: Not Started +- **Status**: Complete ### `reference/rest/websockets.md` - **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` - **Additional Sources**: Current `reference/real-time.md` -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - WebSocket support ### `reference/rest/server-sent-events.md` - **Primary Source**: Extract from real-time or REST docs -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Server-Sent Events support From 3508aabcf6da255b696100710d2f1e68ccea02c0 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 26 Mar 2026 17:48:28 -0600 Subject: [PATCH 24/51] Database Section Migration (#458) * docs: migrate Database section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate Database section to v4 consolidated reference * fixup! fixup! docs: migrate Database section to v4 consolidated reference * fixup! fixup! docs: migrate Database section to v4 consolidated reference * manual review edits * manual review edits * remove nats section * docs: add database/api.md for JS globals (tables, databases, transaction, createBlob) Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- .../database-link-placeholders.md | 135 +++++ .../version-v4/cli/commands.md | 4 +- .../version-v4/cli/operations-api-commands.md | 2 +- .../version-v4/database/api.md | 243 +++++++++ .../version-v4/database/compaction.md | 71 +++ .../version-v4/database/data-loader.md | 216 ++++++++ .../version-v4/database/jobs.md | 272 ++++++++++ .../version-v4/database/overview.md | 123 +++++ .../version-v4/database/schema.md | 503 ++++++++++++++++++ .../version-v4/database/storage-algorithm.md | 111 ++++ .../version-v4/database/system-tables.md | 158 ++++++ .../version-v4/database/transaction.md | 154 ++++++ .../version-v4/logging/configuration.md | 4 +- .../version-v4/logging/operations.md | 4 +- .../version-v4/logging/overview.md | 4 +- .../version-v4/mqtt/overview.md | 4 +- .../version-v4/rest/overview.md | 4 +- .../version-v4/rest/querying.md | 4 +- .../version-v4-sidebars.json | 53 ++ v4-docs-implementation-plan.md | 1 + v4-docs-migration-map.md | 13 + 21 files changed, 2068 insertions(+), 15 deletions(-) create mode 100644 migration-context/link-placeholders/database-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/database/api.md create mode 100644 reference_versioned_docs/version-v4/database/compaction.md create mode 100644 reference_versioned_docs/version-v4/database/data-loader.md create mode 100644 reference_versioned_docs/version-v4/database/jobs.md create mode 100644 reference_versioned_docs/version-v4/database/overview.md create mode 100644 reference_versioned_docs/version-v4/database/schema.md create mode 100644 reference_versioned_docs/version-v4/database/storage-algorithm.md create mode 100644 reference_versioned_docs/version-v4/database/system-tables.md create mode 100644 reference_versioned_docs/version-v4/database/transaction.md diff --git a/migration-context/link-placeholders/database-link-placeholders.md b/migration-context/link-placeholders/database-link-placeholders.md new file mode 100644 index 00000000..2d8a74f0 --- /dev/null +++ b/migration-context/link-placeholders/database-link-placeholders.md @@ -0,0 +1,135 @@ +# Link Placeholders for Database Section + +## reference_versioned_docs/version-v4/database/overview.md + +- Line ~37: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` + - Context: Mentioning custom resources as extension of the database system + - Target should be: Resource API reference page + +- Line ~55: `[REST](TODO:reference_versioned_docs/version-v4/rest/overview.md)` + - Context: Related documentation footer + - Target should be: REST overview + +- Line ~56: `[Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md)` + - Context: Related documentation footer + - Target should be: Resources overview + +- Line ~57: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Related documentation footer + - Target should be: Operations API overview + +- Line ~58: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` + - Context: Related documentation footer + - Target should be: Configuration overview + +## reference_versioned_docs/version-v4/database/schema.md + +- Line ~164: `[REST Querying](TODO:reference_versioned_docs/version-v4/rest/querying.md)` + - Context: How to query tables via HTTP using schema-defined relationships + - Target should be: REST querying reference + +- Line ~165: `[Resources](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` + - Context: Extending table behavior with custom resource logic + - Target should be: Resource API reference + +- Line ~167: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` + - Context: graphqlSchema component and storage configuration + - Target should be: Configuration options page + +- Line ~141 (Dynamic Schema section): `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` + - Context: NoSQL create_attribute/drop_attribute operations + - Target should be: Operations list page + +## reference_versioned_docs/version-v4/database/data-loader.md + +- Line ~13: `[Extension](TODO:reference_versioned_docs/version-v4/components/extension-api.md)` + - Context: dataLoader is an Extension component + - Target should be: Extension API reference + +- Line ~73: `[Components](TODO:reference_versioned_docs/version-v4/components/overview.md)` + - Context: Related documentation footer + - Target should be: Components overview + +## reference_versioned_docs/version-v4/database/storage-algorithm.md + +- Line ~45: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` + - Context: Storage configuration options (compression settings) + - Target should be: Configuration options page (storage section) + +## reference_versioned_docs/version-v4/database/jobs.md + +- Line ~128: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Related documentation footer + - Target should be: Operations API overview + +## reference_versioned_docs/version-v4/database/system-tables.md + +- Line ~82: `[Analytics](TODO:reference_versioned_docs/version-v4/analytics/overview.md)` + - Context: Full analytics metrics reference in related docs footer + - Target should be: Analytics overview + +- Line ~95: `[Replication](TODO:reference_versioned_docs/version-v4/replication/clustering.md)` + - Context: hdb_nodes used by clustering operations + - Target should be: Clustering reference + +- Line ~104: `[Analytics](TODO:reference_versioned_docs/version-v4/analytics/overview.md)` (second reference) + - Context: Related documentation footer + - Target should be: Analytics overview + +- Line ~105: `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` + - Context: Related documentation footer + - Target should be: Replication overview + +- Line ~106: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Querying system tables + - Target should be: Operations API overview + +## reference_versioned_docs/version-v4/database/compaction.md + +- Line ~38: `[CLI Commands](TODO:reference_versioned_docs/version-v4/cli/commands.md)` + - Context: copy-db CLI command + - Target should be: CLI commands reference + +- Line ~56: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` + - Context: Storage configuration options + - Target should be: Configuration options page (storage section) + +## reference_versioned_docs/version-v4/database/api.md + +- Line ~20: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` + - Context: Noting that table classes implement the Resource API + - Target should be: Resource API reference page + +- Line ~48: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` (second reference) + - Context: Pointing to full table method reference + - Target should be: Resource API reference page + +- Line ~186: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` (Related Documentation) + - Context: Related docs footer + - Target should be: Resource API reference page + +- Line ~188: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` + - Context: Blob storage path configuration + - Target should be: Configuration options page (storage section) + +## reference_versioned_docs/version-v4/database/transaction.md + +- Line ~73: `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` + - Context: Clustering must be set up for transaction logs + - Target should be: Replication overview + +- Line ~148: `[Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md)` + - Context: Distinction between app logging and transaction/audit logging + - Target should be: Logging overview + +- Line ~149: `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` (second reference) + - Context: Related documentation footer + - Target should be: Replication overview + +- Line ~150: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` + - Context: logging.auditLog global configuration + - Target should be: Configuration options page + +- Line ~151: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` + - Context: Related documentation footer + - Target should be: Operations API overview diff --git a/reference_versioned_docs/version-v4/cli/commands.md b/reference_versioned_docs/version-v4/cli/commands.md index 2c2e9112..7143f0a2 100644 --- a/reference_versioned_docs/version-v4/cli/commands.md +++ b/reference_versioned_docs/version-v4/cli/commands.md @@ -230,7 +230,7 @@ This copies the default `data` database to a new location with compaction applie - Creating compacted backups - Reclaiming free space -See also: [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md 'Database compaction reference') for more information. +See also: [Database Compaction](../database/compaction.md) for more information. #### How Backups Work @@ -266,4 +266,4 @@ The CLI supports executing commands on remote Harper instances. For details, see - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI - [CLI Authentication](./authentication.md) - Authentication mechanisms - [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration') - Configuration parameters for installation -- [Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md 'Compaction') - More on database compaction +- [Database Compaction](../database/compaction.md) - More on database compaction diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md index 9fa7c540..98bf0446 100644 --- a/reference_versioned_docs/version-v4/cli/operations-api-commands.md +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -153,7 +153,7 @@ last_updated_record: 1724483231970.9949 ``` :::tip -For detailed information on database and table structures, see the [Database Reference](TODO:reference_versioned_docs/version-v4/database/overview.md 'Database reference documentation'). +For detailed information on database and table structures, see the [Database Reference](../database/overview.md). ::: ### Data Operations diff --git a/reference_versioned_docs/version-v4/database/api.md b/reference_versioned_docs/version-v4/database/api.md new file mode 100644 index 00000000..b3b9408a --- /dev/null +++ b/reference_versioned_docs/version-v4/database/api.md @@ -0,0 +1,243 @@ +--- +title: API +--- + + + + + +# API + +Harper exposes a set of global variables and functions that JavaScript code (in components, applications, and plugins) can use to interact with the database system. + +## `tables` + +`tables` is an object whose properties are the tables in the default database (`data`). Each table defined in your `schema.graphql` file is available as a property, and the value is the table class that implements the [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md). + +```graphql +# schema.graphql +type Product @table { + id: ID @primaryKey + name: String + price: Float +} +``` + +```javascript +const { Product } = tables; +// same as: databases.data.Product +``` + +### Example + +```javascript +// Create a new record (id auto-generated) +const created = await Product.create({ name: 'Shirt', price: 9.5 }); + +// Modify the record +await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); + +// Retrieve by primary key +const record = await Product.get(created.id); + +// Query with conditions +const query = { + conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], +}; +for await (const record of Product.search(query)) { + // ... +} +``` + +For the full set of methods available on table classes, see the [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md). + +## `databases` + +`databases` is an object whose properties are Harper databases. Each database contains its tables as properties, the same way `tables` does for the default database. In fact, `databases.data === tables` is always true. + +Use `databases` when you need to access tables from a non-default database. + +### Example + +```javascript +const { Product } = databases.data; // default database +const Events = databases.analytics.Events; // another database + +// Create an event record +const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); + +// Query events +for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { + // handle each event +} +``` + +To define tables in a non-default database, use the `database` argument on the `@table` directive in your schema: + +```graphql +type Events @table(database: "analytics") { + id: ID @primaryKey + eventType: String @indexed +} +``` + +See [Schema](./schema.md) for full schema definition syntax. + +## `transaction(context?, callback)` + +`transaction` executes a callback within a database transaction and returns a promise that resolves when the transaction commits. The callback may be async. + +```typescript +transaction(context?: object, callback: (txn: Transaction) => any | Promise): Promise +``` + +For most operations — HTTP request handlers, for example — Harper automatically starts a transaction. Use `transaction()` explicitly when your code runs outside of a natural transaction context, such as in timers or background jobs. + +### Basic Usage + +```javascript +import { tables } from 'harperdb'; +const { MyTable } = tables; + +if (isMainThread) { + setInterval(async () => { + let data = await (await fetch('https://example.com/data')).json(); + transaction(async (txn) => { + for (let item of data) { + await MyTable.put(item, txn); + } + }); + }, 3600000); // every hour +} +``` + +### Nesting + +If `transaction()` is called with a context that already has an active transaction, it reuses that transaction, executes the callback immediately, and returns. This makes `transaction()` safe to call defensively to ensure a transaction is always active. + +### Transaction Object + +The callback receives a `txn` object with the following members: + +| Member | Type | Description | +| --------------------- | --------------- | ------------------------------------------------------ | +| `commit()` | `() => Promise` | Commits the current transaction | +| `abort()` | `() => void` | Aborts the transaction and resets it | +| `resetReadSnapshot()` | `() => void` | Resets the read snapshot to the latest committed state | +| `timestamp` | `number` | Timestamp associated with the current transaction | + +On normal callback completion the transaction is committed automatically. If the callback throws, the transaction is aborted. + +### Transaction Scope and Atomicity + +Transactions span a single database. All tables within the same database share a single transactional context: reads return a consistent snapshot, and writes across multiple tables are committed atomically. If code accesses tables in different databases, each database gets its own transaction with no cross-database atomicity guarantee. + +For deeper background on Harper's transaction model, see [Storage Algorithm](./storage-algorithm.md). + +## `createBlob(data, options?)` + +Added in: v4.5.0 + +`createBlob` creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, large HTML, etc.) in a `Blob`-typed schema field. + +```typescript +createBlob(data: Buffer | Uint8Array | ReadableStream | string, options?: BlobOptions): Blob +``` + +Harper's `Blob` extends the [Web API `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob), so standard methods (`.text()`, `.arrayBuffer()`, `.stream()`, `.slice()`, `.bytes()`) are all available. Unlike `Bytes`, blobs are stored separately from the record, support streaming, and do not need to fit in memory. + +### Basic Usage + +Declare a blob field in your schema (see [Schema — Blob Type](./schema.md#blob-type)): + +```graphql +type MyTable @table { + id: Any! @primaryKey + data: Blob +} +``` + +Create and store a blob: + +```javascript +let blob = createBlob(largeBuffer); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Retrieve blob data using standard `Blob` methods: + +```javascript +let record = await MyTable.get('my-record'); +let buffer = await record.data.bytes(); // ArrayBuffer +let text = await record.data.text(); // string +let stream = record.data.stream(); // ReadableStream +``` + +### Streaming + +`createBlob` supports streaming data in as data is streamed out — useful for large media where low-latency transmission from origin is critical: + +```javascript +let blob = createBlob(incomingStream); +// blob exists, but data is still streaming to storage +await MyTable.put({ id: 'my-record', data: blob }); + +let record = await MyTable.get('my-record'); +// blob data is accessible as it arrives +let outgoingStream = record.data.stream(); +``` + +Because blobs can be referenced before they are fully written, they are **not** ACID-compliant by default. Use `saveBeforeCommit: true` to wait for the full write before committing: + +```javascript +let blob = createBlob(stream, { saveBeforeCommit: true }); +await MyTable.put({ id: 'my-record', data: blob }); +// put() resolves only after blob is fully written and record is committed +``` + +### `BlobOptions` + +| Option | Type | Default | Description | +| ------------------ | --------- | ------- | ----------------------------------------------------------------------- | +| `saveBeforeCommit` | `boolean` | `false` | Wait for the blob to be fully written before committing the transaction | + +### Error Handling + +Blobs written from a stream can fail mid-stream after the record is committed. Register an error handler to respond to interrupted writes: + +```javascript +export class MyEndpoint extends MyTable { + async get(target) { + const record = await super.get(target); + let blob = record.data; + blob.on('error', () => { + MyTable.invalidate(target); + }); + return { status: 200, headers: {}, body: blob }; + } +} +``` + +### `size` Property + +Blobs created from a stream may not have `size` available immediately. Listen for the `size` event if you need it: + +```javascript +let blob = record.data; +if (blob.size === undefined) { + blob.on('size', (size) => { + // called once size is determined + }); +} +``` + +### Blob Coercion + +When a field is typed as `Blob` in the schema, any string or buffer assigned via `put`, `patch`, or `publish` is automatically coerced to a `Blob`. This means plain JSON HTTP bodies and MQTT messages work without manual `createBlob()` calls in most cases. + +## Related Documentation + +- [Schema](./schema.md) — Defining tables and blob fields +- [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md) — Full table class method reference +- [Transaction Logging](./transaction.md) — Audit log and transaction log for data change history +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md) — Blob storage path configuration diff --git a/reference_versioned_docs/version-v4/database/compaction.md b/reference_versioned_docs/version-v4/database/compaction.md new file mode 100644 index 00000000..152a9ab4 --- /dev/null +++ b/reference_versioned_docs/version-v4/database/compaction.md @@ -0,0 +1,71 @@ +--- +title: Compaction +--- + + + + +# Compaction + +Added in: v4.3.0 + +Database files grow over time as records are inserted, updated, and deleted. Deleted records and updated values leave behind free space (fragmentation) in the database file, which can increase file size and potentially affect performance. Compaction eliminates this free space, creating a smaller, contiguous database file. + +> **Note:** Compaction does not compress your data. It removes internal fragmentation to make the file smaller. To enable compression on a database, use compaction to copy the database with updated storage configuration applied. + +Compaction is also the mechanism to apply storage configuration changes (such as enabling compression) to existing databases, since some storage settings cannot be changed in-place. + +## Copy Compaction + +Creates a compacted copy of a database file. The original database is left unchanged. + +> **Recommendation:** Stop Harper before performing copy compaction to prevent any record loss during the copy operation. + +Run using the [CLI](../cli/commands.md): + +```bash +harperdb copy-db +``` + +The `source-database` is the database name (not a file path). The target is the full file path where the compacted copy will be written. + +To replace the original database with the compacted copy, move or rename the output file to the original database path after Harper is stopped. + +**Example — compact the default `data` database:** + +```bash +harperdb copy-db data /home/user/hdb/database/copy.mdb +``` + +## Compact on Start + +Automatically compacts all non-system databases when Harper starts. Harper will not start until compaction is complete. Under the hood, it loops through all user databases, creates a backup of each, compacts it, replaces the original with the compacted copy, and removes the backup. + +Configure in `harperdb-config.yaml`: + +```yaml +storage: + compactOnStart: true + compactOnStartKeepBackup: false +``` + +Using CLI environment variables: + +```bash +STORAGE_COMPACTONSTART=true STORAGE_COMPACTONSTARTKEEPBACKUP=true harperdb +``` + +### Options + +| Option | Type | Default | Description | +| -------------------------- | ------- | ------- | ------------------------------------------------------------------------------- | +| `compactOnStart` | Boolean | `false` | Compact all databases at startup. Automatically reset to `false` after running. | +| `compactOnStartKeepBackup` | Boolean | `false` | Retain the backup copy created during compact on start | + +> **Note:** `compactOnStart` is automatically set back to `false` after it runs, so compaction only happens on the next start if you explicitly re-enable it. + +## Related Documentation + +- [Storage Algorithm](./storage-algorithm.md) — How Harper stores data using LMDB +- [CLI Commands](../cli/commands.md) — `copy-db` CLI command reference +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'storage section') — Full storage configuration options including compression settings diff --git a/reference_versioned_docs/version-v4/database/data-loader.md b/reference_versioned_docs/version-v4/database/data-loader.md new file mode 100644 index 00000000..69745753 --- /dev/null +++ b/reference_versioned_docs/version-v4/database/data-loader.md @@ -0,0 +1,216 @@ +--- +title: Data Loader +--- + + + + +# Data Loader + +Added in: v4.6.0 + +The Data Loader is a built-in component that loads data from JSON or YAML files into Harper tables as part of component deployment. It is designed for seeding tables with initial records — configuration data, reference data, default users, or other records that should exist when a component is first deployed or updated. + +## Configuration + +In your component's `config.yaml`, use the `dataLoader` key to specify the data files to load: + +```yaml +dataLoader: + files: 'data/*.json' +``` + +`dataLoader` is an [Extension](TODO:reference_versioned_docs/version-v4/components/extension-api.md 'Extension component API') and supports the standard `files` configuration option, including glob patterns. + +## Data File Format + +Each data file loads records into a single table. The file specifies the target database, table, and an array of records. + +### JSON Example + +```json +{ + "database": "myapp", + "table": "users", + "records": [ + { + "id": 1, + "username": "admin", + "email": "admin@example.com", + "role": "administrator" + }, + { + "id": 2, + "username": "user1", + "email": "user1@example.com", + "role": "standard" + } + ] +} +``` + +### YAML Example + +```yaml +database: myapp +table: settings +records: + - id: 1 + setting_name: app_name + setting_value: My Application + - id: 2 + setting_name: version + setting_value: '1.0.0' +``` + +One table per file. To load data into multiple tables, create a separate file for each table. + +## File Patterns + +The `files` option accepts a single path, a list of paths, or a glob pattern: + +```yaml +# Single file +dataLoader: + files: 'data/seed-data.json' + +# Multiple specific files +dataLoader: + files: + - 'data/users.json' + - 'data/settings.yaml' + - 'data/initial-products.json' + +# Glob pattern +dataLoader: + files: 'data/**/*.{json,yaml,yml}' +``` + +## Loading Behavior + +The Data Loader runs on every full system start and every component deployment — this includes fresh installs, restarts of the Harper process, and redeployments of the component. It does **not** re-run on individual thread restarts within a running Harper process. + +Because the Data Loader runs on every startup and deployment, change detection is central to how it works safely. On each run: + +1. All specified data files are read (JSON or YAML) +2. Each file is validated to reference a single table +3. Records are inserted or updated based on content hash comparison: + - New records are inserted if they don't exist + - Existing records are updated only if the data file content has changed + - Records created outside the Data Loader (via Operations API, REST, etc.) are never overwritten + - Records modified by users after being loaded are preserved and not overwritten + - Extra fields added by users to data-loaded records are preserved during updates +4. SHA-256 content hashes are stored in the [`hdb_dataloader_hash`](./system-tables.md#hdb_dataloader_hash) system table to track which records have been loaded and detect changes + +### Change Detection + +| Scenario | Behavior | +| -------------------------------------------------- | ------------------------------------------------ | +| New record | Inserted; content hash stored | +| Unchanged record | Skipped (no writes) | +| Changed data file | Updated via `patch`, preserving any extra fields | +| Record created by user (not data loader) | Never overwritten | +| Record modified by user after load | Preserved, not overwritten | +| Extra fields added by user to a data-loaded record | Preserved during updates | + +This design makes data files safe to redeploy repeatedly — across deployments, node scaling, and system restarts — without losing manual modifications or causing unnecessary writes. + +## Best Practices + +**Define schemas first.** While the Data Loader can infer schemas from the records it loads, it is strongly recommended to define table schemas explicitly using the [graphqlSchema component](./schema.md) before loading data. This ensures proper types, constraints, and relationships. + +**One table per file.** Each data file must target a single table. Organize files accordingly. + +**Idempotent data.** Design files to be safe to load multiple times without creating duplicate or conflicting records. + +**Version control.** Include data files in version control for consistency across deployments and environments. + +**Environment-specific data.** Consider using different data files for different environments (development, staging, production) to avoid loading inappropriate records. + +**Validate before deploying.** Ensure data files are valid JSON or YAML and match your table schemas before deployment to catch type mismatches early. + +**No sensitive data.** Do not include passwords, API keys, or secrets directly in data files. Use environment variables or secure configuration management instead. + +## Example Component Structure + +A common production use case is shipping reference data — lookup tables like countries and regions — as part of a component. The records are version-controlled alongside the code, consistent across every environment, and the data loader keeps them in sync on every deployment without touching any user-modified fields. + +``` +my-component/ +├── config.yaml +├── schemas.graphql +├── roles.yaml +└── data/ + ├── countries.json # ISO country codes — reference data, ships with component + └── regions.json # region/subdivision codes +``` + +**`config.yaml`**: + +```yaml +graphqlSchema: + files: 'schemas.graphql' + +roles: + files: 'roles.yaml' + +dataLoader: + files: 'data/*.json' + +rest: true +``` + +**`schemas.graphql`**: + +```graphql +type Country @table(database: "myapp") @export { + id: ID @primaryKey # ISO 3166-1 alpha-2, e.g. "US" + name: String @indexed + region: String @indexed +} + +type Region @table(database: "myapp") @export { + id: ID @primaryKey # ISO 3166-2, e.g. "US-CA" + name: String @indexed + countryId: ID @indexed + country: Country @relationship(from: countryId) +} +``` + +**`data/countries.json`**: + +```json +{ + "database": "myapp", + "table": "Country", + "records": [ + { "id": "US", "name": "United States", "region": "Americas" }, + { "id": "GB", "name": "United Kingdom", "region": "Europe" }, + { "id": "DE", "name": "Germany", "region": "Europe" } + // ... all ~250 ISO countries + ] +} +``` + +**`data/regions.json`**: + +```json +{ + "database": "myapp", + "table": "Region", + "records": [ + { "id": "US-CA", "name": "California", "countryId": "US" }, + { "id": "US-NY", "name": "New York", "countryId": "US" }, + { "id": "GB-ENG", "name": "England", "countryId": "GB" } + // ... + ] +} +``` + +Because the data loader uses content hashing, adding new countries or correcting a name in the file will update only the changed records on the next deployment — existing records that haven't changed are skipped entirely. + +## Related Documentation + +- [Schema](./schema.md) — Defining table structure before loading data +- [Jobs](./jobs.md) — Bulk data operations via the Operations API (CSV/JSON import from file, URL, or S3) +- [Components](TODO:reference_versioned_docs/version-v4/components/overview.md) — Extension and plugin system that the data loader is built on diff --git a/reference_versioned_docs/version-v4/database/jobs.md b/reference_versioned_docs/version-v4/database/jobs.md new file mode 100644 index 00000000..5931746c --- /dev/null +++ b/reference_versioned_docs/version-v4/database/jobs.md @@ -0,0 +1,272 @@ +--- +title: Jobs +--- + + + + + +# Jobs + +Harper uses an asynchronous job system for long-running data operations. When a bulk operation is initiated — such as loading a large CSV file or exporting millions of records — Harper starts a background job and immediately returns a job ID. Use the job ID to check progress and status. + +Job status values: + +- `IN_PROGRESS` — the job is currently running +- `COMPLETE` — the job finished successfully + +## Bulk Operations + +The following operations create jobs. All bulk operations are sent to the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md). + +### CSV Data Load + +Ingests CSV data provided directly in the request body. + +- `operation` _(required)_ — `csv_data_load` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `data` _(required)_ — CSV content as a string + +```json +{ + "operation": "csv_data_load", + "database": "dev", + "action": "insert", + "table": "breed", + "data": "id,name,country\n1,Labrador,Canada\n2,Poodle,France\n" +} +``` + +Response: + +```json +{ + "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", + "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" +} +``` + +--- + +### CSV File Load + +Ingests CSV data from a file on the server's local filesystem. + +> The CSV file must reside on the same machine running Harper. + +- `operation` _(required)_ — `csv_file_load` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `file_path` _(required)_ — absolute path to the CSV file on the host + +```json +{ + "operation": "csv_file_load", + "action": "insert", + "database": "dev", + "table": "breed", + "file_path": "/home/user/imports/breeds.csv" +} +``` + +--- + +### CSV URL Load + +Ingests CSV data from a URL. + +- `operation` _(required)_ — `csv_url_load` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `csv_url` _(required)_ — URL pointing to the CSV file + +```json +{ + "operation": "csv_url_load", + "action": "insert", + "database": "dev", + "table": "breed", + "csv_url": "https://s3.amazonaws.com/mydata/breeds.csv" +} +``` + +--- + +### Import from S3 + +Imports CSV or JSON files from an AWS S3 bucket. + +- `operation` _(required)_ — `import_from_s3` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `s3` _(required)_ — S3 connection details: + - `aws_access_key_id` + - `aws_secret_access_key` + - `bucket` + - `key` — filename including extension (`.csv` or `.json`) + - `region` + +```json +{ + "operation": "import_from_s3", + "action": "insert", + "database": "dev", + "table": "dog", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET_KEY", + "bucket": "BUCKET_NAME", + "key": "dogs.json", + "region": "us-east-1" + } +} +``` + +--- + +### Export Local + +Exports table data to a local file in JSON or CSV format. + +- `operation` _(required)_ — `export_local` +- `format` _(required)_ — `json` or `csv` +- `path` _(required)_ — local directory path where the export file will be written +- `search_operation` _(required)_ — query to select records: `search_by_hash`, `search_by_value`, `search_by_conditions`, or `sql` + +Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation for exports + +- `filename` _(optional)_ — filename without extension; auto-generated from epoch timestamp if omitted + +```json +{ + "operation": "export_local", + "format": "json", + "path": "/data/exports/", + "search_operation": { + "operation": "sql", + "sql": "SELECT * FROM dev.breed" + } +} +``` + +--- + +### Export to S3 + +Exports table data to an AWS S3 bucket in JSON or CSV format. + +Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation + +- `operation` _(required)_ — `export_to_s3` +- `format` _(required)_ — `json` or `csv` +- `s3` _(required)_ — S3 connection details (same fields as Import from S3, plus `key` for the output object name) +- `search_operation` _(required)_ — `search_by_hash`, `search_by_value`, `search_by_conditions`, or `sql` + +```json +{ + "operation": "export_to_s3", + "format": "json", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET_KEY", + "bucket": "BUCKET_NAME", + "key": "exports/dogs.json", + "region": "us-east-1" + }, + "search_operation": { + "operation": "sql", + "sql": "SELECT * FROM dev.dog" + } +} +``` + +--- + +### Delete Records Before + +Deletes records older than a given timestamp from a table. Operates only on the local node — clustered replicas retain their data. + +_Restricted to `super_user` roles._ + +- `operation` _(required)_ — `delete_records_before` +- `schema` _(required)_ — database name +- `table` _(required)_ — table name +- `date` _(required)_ — records with `__createdtime__` before this timestamp are deleted. Format: `YYYY-MM-DDThh:mm:ss.sZ` + +```json +{ + "operation": "delete_records_before", + "date": "2024-01-01T00:00:00.000Z", + "schema": "dev", + "table": "breed" +} +``` + +## Managing Jobs + +### Get Job + +Returns status, metrics, and messages for a specific job by ID. + +- `operation` _(required)_ — `get_job` +- `id` _(required)_ — job ID + +```json +{ + "operation": "get_job", + "id": "4a982782-929a-4507-8794-26dae1132def" +} +``` + +Response: + +```json +[ + { + "__createdtime__": 1611615798782, + "__updatedtime__": 1611615801207, + "created_datetime": 1611615798774, + "end_datetime": 1611615801206, + "id": "4a982782-929a-4507-8794-26dae1132def", + "job_body": null, + "message": "successfully loaded 350 of 350 records", + "start_datetime": 1611615798805, + "status": "COMPLETE", + "type": "csv_url_load", + "user": "HDB_ADMIN", + "start_datetime_converted": "2021-01-25T23:03:18.805Z", + "end_datetime_converted": "2021-01-25T23:03:21.206Z" + } +] +``` + +--- + +### Search Jobs by Start Date + +Returns all jobs started within a time window. + +_Restricted to `super_user` roles._ + +- `operation` _(required)_ — `search_jobs_by_start_date` +- `from_date` _(required)_ — start of the search window (ISO 8601 format) +- `to_date` _(required)_ — end of the search window (ISO 8601 format) + +```json +{ + "operation": "search_jobs_by_start_date", + "from_date": "2024-01-01T00:00:00.000+0000", + "to_date": "2024-01-02T00:00:00.000+0000" +} +``` + +## Related Documentation + +- [Data Loader](./data-loader.md) — Component-based data loading as part of deployment +- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Sending operations to Harper +- [Transaction Logging](./transaction.md) — Recording a history of changes made to tables diff --git a/reference_versioned_docs/version-v4/database/overview.md b/reference_versioned_docs/version-v4/database/overview.md new file mode 100644 index 00000000..af5b8471 --- /dev/null +++ b/reference_versioned_docs/version-v4/database/overview.md @@ -0,0 +1,123 @@ +--- +title: Overview +--- + + + + + +# Database + +Harper's database system is the foundation of its data storage and retrieval capabilities. It is built on top of [LMDB](https://www.symas.com/lmdb) (Lightning Memory-Mapped Database) and is designed to provide high performance, ACID-compliant storage with automatic indexing and flexible schema support. + +## How Harper Stores Data + +Harper organizes data in a three-tier hierarchy: + +- **Databases** — containers that group related tables together in a single transactional file +- **Tables** — collections of records with a common data pattern +- **Records** — individual data objects with a primary key and any number of attributes + +All tables within a database share the same transaction context, meaning reads and writes across tables in the same database can be performed atomically. + +### The Schema System and Auto-REST + +The most common way to use Harper's database is through the **schema system**. By defining a [GraphQL schema](./schema.md), you can: + +- Declare tables and their attribute types +- Control which attributes are indexed +- Define relationships between tables +- Automatically expose data via REST, MQTT, and other interfaces + +You do not need to build custom application code to use the database. A schema definition alone is enough to create fully functional, queryable REST endpoints for your data. + +For more advanced use cases, you can extend table behavior using the [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Custom resource logic layered on top of tables'). + +### Architecture Overview + +``` + ┌──────────┐ ┌──────────┐ + │ Clients │ │ Clients │ + └────┬─────┘ └────┬─────┘ + │ │ + ▼ ▼ + ┌────────────────────────────────────────┐ + │ │ + │ Socket routing/management │ + ├───────────────────────┬────────────────┤ + │ │ │ + │ Server Interfaces ─►│ Authentication │ + │ RESTful HTTP, MQTT │ Authorization │ + │ ◄─┤ │ + │ ▲ └────────────────┤ + │ │ │ │ + ├───┼──────────┼─────────────────────────┤ + │ │ │ ▲ │ + │ ▼ Resources ▲ │ ┌───────────┐ │ + │ │ └─┤ │ │ + ├─────────────────┴────┐ │ App │ │ + │ ├─►│ resources │ │ + │ Database tables │ └───────────┘ │ + │ │ ▲ │ + ├──────────────────────┘ │ │ + │ ▲ ▼ │ │ + │ ┌────────────────┐ │ │ + │ │ External │ │ │ + │ │ data sources ├────┘ │ + │ │ │ │ + │ └────────────────┘ │ + │ │ + └────────────────────────────────────────┘ +``` + +## Databases + +Harper databases hold a collection of tables in a single transactionally-consistent file. This means reads and writes can be performed atomically across all tables in the same database, and multi-table transactions are replicated as a single atomic unit. + +The default database is named `data`. Most applications will use this default. Additional databases can be created for namespace separation — this is particularly useful for components designed for reuse across multiple applications, where a unique database name avoids naming collisions. + +> **Note:** Transactions do not preserve atomicity across different databases, only across tables within the same database. + +## Tables + +Tables group records with a common data pattern. A table must have: + +- **Table name** — used to identify the table +- **Primary key** — the unique identifier for each record (also referred to as `hash_attribute` in the Operations API) + +Primary keys must be unique. If a primary key is not provided on insert, Harper auto-generates one: + +- A **UUID string** for primary keys typed as `String` or `ID` +- An **auto-incrementing integer** for primary keys typed as `Int`, `Long`, or `Any` + +Numeric primary keys are more efficient than UUIDs for large tables. + +## Dynamic vs. Defined Schemas + +Harper tables can operate in two modes: + +**Defined schemas** (recommended): Tables with schemas explicitly declared using [GraphQL schema syntax](./schema.md). This provides predictable structure, precise control over indexing, and data integrity. Schemas are declared in a component's `schema.graphql` file. + +**Dynamic schemas**: Tables created through the Operations API or Studio without a schema definition. Attributes are reflexively added as data is ingested. All top-level attributes are automatically indexed. Dynamic schema tables automatically maintain `__createdtime__` and `__updatedtime__` audit attributes on every record. + +It is best practice to define schemas for production tables. Dynamic schemas are convenient for experimentation and prototyping. + +## Key Concepts + +For deeper coverage of each database feature, see the dedicated pages in this section: + +- **[Schema](./schema.md)** — Defining table structure, types, indexes, relationships, and computed properties using GraphQL schema syntax +- **[API](./api.md)** — The `tables`, `databases`, `transaction()`, and `createBlob()` globals for interacting with the database from code +- **[Data Loader](./data-loader.md)** — Loading seed or initial data into tables as part of component deployment +- **[Storage Algorithm](./storage-algorithm.md)** — How Harper stores data using LMDB with universal indexing and ACID compliance +- **[Jobs](./jobs.md)** — Asynchronous bulk data operations (CSV import/export, S3 import/export) +- **[System Tables](./system-tables.md)** — Harper internal tables for analytics, data loader state, and other system features +- **[Compaction](./compaction.md)** — Reducing database file size by eliminating fragmentation and free space +- **[Transaction Logging](./transaction.md)** — Recording and querying a history of data changes via audit log and transaction log + +## Related Documentation + +- [REST](../rest/overview.md) — HTTP interface built on top of the database resource system +- [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md) — Custom application logic extending database tables +- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Direct database management operations (create/drop databases and tables, insert/update/delete records) +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md) — Storage configuration options (compression, blob paths, compaction) diff --git a/reference_versioned_docs/version-v4/database/schema.md b/reference_versioned_docs/version-v4/database/schema.md new file mode 100644 index 00000000..0bd57adb --- /dev/null +++ b/reference_versioned_docs/version-v4/database/schema.md @@ -0,0 +1,503 @@ +--- +title: Schema +--- + + + + + + + + + + + +# Schema + +Harper uses GraphQL Schema Definition Language (SDL) to declaratively define table structure. Schema definitions are loaded from `.graphql` files in a component directory and control table creation, attribute types, indexing, and relationships. + +## Overview + +Added in: v4.2.0 + +Schemas are defined using standard [GraphQL type definitions](https://graphql.org/learn/schema/) with Harper-specific directives. A schema definition: + +- Ensures required tables exist when a component is deployed +- Enforces attribute types and required constraints +- Controls which attributes are indexed +- Defines relationships between tables +- Configures computed properties, expiration, and audit behavior + +Schemas are flexible by default — records may include additional properties beyond those declared in the schema. Use the `@sealed` directive to prevent this. + +A minimal example: + +```graphql +type Dog @table { + id: ID @primaryKey + name: String + breed: String + age: Int +} + +type Breed @table { + id: ID @primaryKey + name: String @indexed +} +``` + +### Loading Schemas + +In a component's `config.yaml`, specify the schema file with the `graphqlSchema` plugin: + +```yaml +graphqlSchema: + files: 'schema.graphql' +``` + +Keep in mind that both plugins and applications can specify schemas. + +## Type Directives + +Type directives apply to the entire table type definition. + +### `@table` + +Marks a GraphQL type as a Harper database table. The type name becomes the table name by default. + +```graphql +type MyTable @table { + id: ID @primaryKey +} +``` + +Optional arguments: + +| Argument | Type | Default | Description | +| ------------ | --------- | -------------- | ----------------------------------------------------------------------- | +| `table` | `String` | type name | Override the table name | +| `database` | `String` | `"data"` | Database to place the table in | +| `expiration` | `Int` | — | Auto-expire records after this many seconds (useful for caching tables) | +| `audit` | `Boolean` | config default | Enable audit log for this table | + +**Examples:** + +```graphql +# Override table name +type Product @table(table: "products") { + id: ID @primaryKey +} + +# Place in a specific database +type Order @table(database: "commerce") { + id: ID @primaryKey +} + +# Auto-expire records after 1 hour (e.g., a session cache) +type Session @table(expiration: 3600) { + id: ID @primaryKey + userId: String +} + +# Enable audit log for this table explicitly +type AuditedRecord @table(audit: true) { + id: ID @primaryKey + value: String +} + +# Combine multiple arguments +type Event @table(database: "analytics", expiration: 86400) { + id: Long @primaryKey + name: String @indexed +} +``` + +**Database naming:** Since all tables default to the `data` database, when designing plugins or applications, consider using unique database names to avoid table naming collisions. + +### `@export` + +Exposes the table as an externally accessible resource endpoint, available via REST, MQTT, and other interfaces. + +```graphql +type MyTable @table @export(name: "my-table") { + id: ID @primaryKey +} +``` + +The optional `name` parameter specifies the URL path segment (e.g., `/my-table/`). Without `name`, the type name is used. + +### `@sealed` + +Prevents records from including any properties beyond those explicitly declared in the type. By default, Harper allows records to have additional properties. + +```graphql +type StrictRecord @table @sealed { + id: ID @primaryKey + name: String +} +``` + +## Field Directives + +Field directives apply to individual attributes in a type definition. + +### `@primaryKey` + +Designates the attribute as the table's primary key. Primary keys must be unique; inserts with a duplicate primary key are rejected. + +```graphql +type Product @table { + id: Long @primaryKey + name: String +} +``` + +If no primary key is provided on insert, Harper auto-generates one: + +- **UUID string** — when type is `String` or `ID` +- **Auto-incrementing integer** — when type is `Int`, `Long`, or `Any` + +Changed in: v4.4.0 + +Auto-incrementing integer primary keys were added. Previously only UUID generation was supported for `ID` and `String` types. + +Using `Long` or `Any` is recommended for auto-generated numeric keys. `Int` is limited to 32-bit and may be insufficient for large tables. + +### `@indexed` + +Creates a secondary index on the attribute for fast querying. Required for filtering by this attribute in REST queries, SQL, or NoSQL operations. + +```graphql +type Product @table { + id: ID @primaryKey + category: String @indexed + price: Float @indexed +} +``` + +If the field value is an array, each element in the array is individually indexed, enabling queries by any individual value. + +Null values are indexed by default (added in v4.3.0), enabling queries like `GET /Product/?category=null`. + +### `@createdTime` + +Automatically assigns a creation timestamp (Unix epoch milliseconds) to the attribute when a record is created. + +```graphql +type Event @table { + id: ID @primaryKey + createdAt: Long @createdTime +} +``` + +### `@updatedTime` + +Automatically assigns a timestamp (Unix epoch milliseconds) each time the record is updated. + +```graphql +type Event @table { + id: ID @primaryKey + updatedAt: Long @updatedTime +} +``` + +## Relationships + +Added in: v4.3.0 + +The `@relationship` directive defines how one table relates to another through a foreign key. Relationships enable join queries and allow related records to be selected as nested properties in query results. + +### `@relationship(from: attribute)` — many-to-one or many-to-many + +The foreign key is in this table, referencing the primary key of the target table. + +```graphql +type RealityShow @table @export { + id: ID @primaryKey + networkId: ID @indexed # foreign key + network: Network @relationship(from: networkId) # many-to-one + title: String @indexed +} + +type Network @table @export { + id: ID @primaryKey + name: String @indexed # e.g. "Bravo", "Peacock", "Netflix" +} +``` + +Query shows by network name: + +```http +GET /RealityShow?network.name=Bravo +``` + +If the foreign key is an array, this establishes a many-to-many relationship (e.g., a show with multiple streaming homes): + +```graphql +type RealityShow @table @export { + id: ID @primaryKey + networkIds: [ID] @indexed + networks: [Network] @relationship(from: networkIds) +} +``` + +### `@relationship(to: attribute)` — one-to-many or many-to-many + +The foreign key is in the target table, referencing the primary key of this table. The result type must be an array. + +```graphql +type Network @table @export { + id: ID @primaryKey + name: String @indexed # e.g. "Bravo", "Peacock", "Netflix" + shows: [RealityShow] @relationship(to: networkId) # one-to-many + # shows like "Real Housewives of Atlanta", "The Traitors", "Vanderpump Rules" +} +``` + +### `@relationship(from: attribute, to: attribute)` — foreign key to foreign key + +Both `from` and `to` can be specified together to define a relationship where neither side uses the primary key — a foreign key to foreign key join. This is useful for many-to-many relationships that join on non-primary-key attributes. + +```graphql +type OrderItem @table @export { + id: ID @primaryKey + orderId: ID @indexed + productSku: ID @indexed + product: Product @relationship(from: productSku, to: sku) # join on sku, not primary key +} + +type Product @table @export { + id: ID @primaryKey + sku: ID @indexed + name: String +} +``` + +Schemas can also define self-referential relationships, enabling parent-child hierarchies within a single table. + +## Computed Properties + +Added in: v4.4.0 + +The `@computed` directive marks a field as derived from other fields at query time. Computed properties are not stored in the database but are evaluated when the field is accessed. + +```graphql +type Product @table { + id: ID @primaryKey + price: Float + taxRate: Float + totalPrice: Float @computed(from: "price + (price * taxRate)") +} +``` + +The `from` argument is a JavaScript expression that can reference other record fields. + +Computed properties can also be defined in JavaScript for complex logic: + +```graphql +type Product @table { + id: ID @primaryKey + totalPrice: Float @computed +} +``` + +```javascript +tables.Product.setComputedAttribute('totalPrice', (record) => { + return record.price + record.price * record.taxRate; +}); +``` + +Computed properties are not included in query results by default — use `select` to include them explicitly. + +### Computed Indexes + +Computed properties can be indexed with `@indexed`, enabling custom indexing strategies such as composite indexes, full-text search, or vector indexing: + +```graphql +type Product @table { + id: ID @primaryKey + tags: String + tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed +} +``` + +When using a JavaScript function for an indexed computed property, use the `version` argument to ensure re-indexing when the function changes: + +```graphql +type Product @table { + id: ID @primaryKey + totalPrice: Float @computed(version: 1) @indexed +} +``` + +Increment `version` whenever the computation function changes. Failing to do so can result in an inconsistent index. + +## Vector Indexing + +Added in: v4.6.0 + +Use `@indexed(type: "HNSW")` to create a vector index using the Hierarchical Navigable Small World algorithm, designed for fast approximate nearest-neighbor search on high-dimensional vectors. + +```graphql +type Document @table { + id: Long @primaryKey + textEmbeddings: [Float] @indexed(type: "HNSW") +} +``` + +Query by nearest neighbors using the `sort` parameter: + +```javascript +let results = Document.search({ + sort: { attribute: 'textEmbeddings', target: searchVector }, + limit: 5, +}); +``` + +HNSW can be combined with filter conditions: + +```javascript +let results = Document.search({ + conditions: [{ attribute: 'price', comparator: 'lt', value: 50 }], + sort: { attribute: 'textEmbeddings', target: searchVector }, + limit: 5, +}); +``` + +### HNSW Parameters + +| Parameter | Default | Description | +| ---------------------- | ----------------- | --------------------------------------------------------------------------------------------------- | +| `distance` | `"cosine"` | Distance function: `"euclidean"` or `"cosine"` (negative cosine similarity) | +| `efConstruction` | `100` | Max nodes explored during index construction. Higher = better recall, lower = better performance | +| `M` | `16` | Preferred connections per graph layer. Higher = more space, better recall for high-dimensional data | +| `optimizeRouting` | `0.5` | Heuristic aggressiveness for omitting redundant connections (0 = off, 1 = most aggressive) | +| `mL` | computed from `M` | Normalization factor for level generation | +| `efSearchConstruction` | `50` | Max nodes explored during search | + +Example with custom parameters: + +```graphql +type Document @table { + id: Long @primaryKey + textEmbeddings: [Float] @indexed(type: "HNSW", distance: "euclidean", optimizeRouting: 0, efSearchConstruction: 100) +} +``` + +## Field Types + +Harper supports the following field types: + +| Type | Description | +| --------- | ---------------------------------------------------------------------------------------------- | +| `String` | Unicode text, UTF-8 encoded | +| `Int` | 32-bit signed integer (−2,147,483,648 to 2,147,483,647) | +| `Long` | 54-bit signed integer (−9,007,199,254,740,992 to 9,007,199,254,740,992) | +| `Float` | 64-bit double precision floating point | +| `BigInt` | Integer up to ~300 digits. Note: distinct JavaScript type; handle appropriately in custom code | +| `Boolean` | `true` or `false` | +| `ID` | String; indicates a non-human-readable identifier | +| `Any` | Any primitive, object, or array | +| `Date` | JavaScript `Date` object | +| `Bytes` | Binary data as `Buffer` or `Uint8Array` | +| `Blob` | Binary large object; designed for streaming content >20KB | + +Added `BigInt` in v4.3.0 + +Added `Blob` in v4.5.0 + +Arrays of a type are expressed with `[Type]` syntax (e.g., `[Float]` for a vector). + +### Blob Type + +Added in: v4.5.0 + +`Blob` fields are designed for large binary content. Harper's `Blob` type implements the [Web API `Blob` interface](https://developer.mozilla.org/en-US/docs/Web/API/Blob), so all standard `Blob` methods (`.text()`, `.arrayBuffer()`, `.stream()`, `.slice()`) are available. Unlike `Bytes`, blobs are stored separately from the record, support streaming, and do not need to be held entirely in memory. Use `Blob` for content typically larger than 20KB (images, video, audio, large HTML, etc.). + +See [Blob usage details](#blob-usage) below. + +#### Blob Usage + +Declare a blob field: + +```graphql +type MyTable @table { + id: Any! @primaryKey + data: Blob +} +``` + +Create and store a blob using [`createBlob()`](./api.md#createblobdata-options): + +```javascript +let blob = createBlob(largeBuffer); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Retrieve blob data using standard Web API `Blob` methods: + +```javascript +let record = await MyTable.get('my-record'); +let buffer = await record.data.bytes(); // ArrayBuffer +let text = await record.data.text(); // string +let stream = record.data.stream(); // ReadableStream +``` + +Blobs support asynchronous streaming, meaning a record can reference a blob before it is fully written to storage. Use `saveBeforeCommit: true` to wait for full write before committing: + +```javascript +let blob = createBlob(stream, { saveBeforeCommit: true }); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Any string or buffer assigned to a `Blob` field in a `put`, `patch`, or `publish` is automatically coerced to a `Blob`. + +When returning a blob via REST, register an error handler to handle interrupted streams: + +```javascript +export class MyEndpoint extends MyTable { + async get(target) { + const record = super.get(target); + let blob = record.data; + blob.on('error', () => { + MyTable.invalidate(target); + }); + return { status: 200, headers: {}, body: blob }; + } +} +``` + +## Dynamic Schema Behavior + +When a table is created through the Operations API or Studio without a schema definition, it follows dynamic schema behavior: + +- Attributes are reflexively created as data is ingested +- All top-level attributes are automatically indexed +- Records automatically get `__createdtime__` and `__updatedtime__` audit attributes + +Dynamic schema tables are additive — new attributes are added as new data arrives. Existing records will have `null` for any newly added attributes. + +Use `create_attribute` and `drop_attribute` operations to manually manage attributes on dynamic schema tables. See the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'NoSQL and database operations') for details. + +## OpenAPI Specification + +Tables exported with `@export` are described via an `/openapi` endpoint on the main HTTP server associated with the REST service (default port 9926). + +```http +GET http://localhost:9926/openapi +``` + +This provides an OpenAPI 3.x description of all exported resource endpoints. The endpoint is a starting guide and may not cover every edge case. + +## Renaming Tables + +Harper does **not** support renaming tables. Changing a type name in a schema definition creates a new, empty table — the original table and its data are unaffected. + +## Related Documentation + +- [JavaScript API](./api.md) — `tables`, `databases`, `transaction()`, and `createBlob()` globals for working with schema-defined tables in code +- [Data Loader](./data-loader.md) — Seed tables with initial data alongside schema deployment +- [REST Querying](../rest/querying.md) — Querying tables via HTTP using schema-defined attributes and relationships +- [Resources](TODO:reference_versioned_docs/version-v4/resources/resource-api.md) — Extending table behavior with custom application logic +- [Storage Algorithm](./storage-algorithm.md) — How Harper indexes and stores schema-defined data +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'graphqlSchema component and storage options') — Component configuration for schemas diff --git a/reference_versioned_docs/version-v4/database/storage-algorithm.md b/reference_versioned_docs/version-v4/database/storage-algorithm.md new file mode 100644 index 00000000..346f2cb4 --- /dev/null +++ b/reference_versioned_docs/version-v4/database/storage-algorithm.md @@ -0,0 +1,111 @@ +--- +title: Storage Algorithm +--- + + + + +# Storage Algorithm + +Harper's storage algorithm is the foundation of all database functionality. It is built on top of [LMDB](https://www.symas.com/lmdb) (Lightning Memory-Mapped Database), a high-performance key-value store, and extends it with automatic indexing, query-language-agnostic data access, and ACID compliance. + +## Query Language Agnostic + +Harper's storage layer is decoupled from any specific query language. Data inserted via NoSQL operations can be read via SQL, REST, or the Resource API — all accessing the same underlying storage. This architecture allows Harper to add new query interfaces without changing how data is stored. + +## ACID Compliance + +Harper provides full ACID compliance on each node using Multi-Version Concurrency Control (MVCC) through LMDB: + +- **Atomicity**: All writes in a transaction either fully commit or fully roll back +- **Consistency**: Each transaction moves data from one valid state to another +- **Isolation**: Readers and writers operate independently — readers do not block writers and writers do not block readers +- **Durability**: Committed transactions are persisted to disk + +Each Harper table has a single writer process, eliminating deadlocks and ensuring writes are executed in the order received. Multiple reader processes can operate concurrently for high-throughput reads. + +## Universally Indexed + +Changed in: v4.3.0 — Storage performance improvements including better free-space management + +For [dynamic schema tables](./overview.md#dynamic-vs-defined-schemas), all top-level attributes are automatically indexed immediately upon ingestion — Harper reflexively creates the attribute and its index as new data arrives. For [schema-defined tables](./schema.md), indexes are created for all attributes marked with `@indexed`. + +Indexes are type-agnostic, ordering values as follows: + +1. Booleans +2. Numbers (ordered numerically) +3. Strings (ordered lexically) + +### LMDB Storage Layout + +Within the LMDB implementation, table records are grouped into a single LMDB environment file. Each attribute index is stored as a sub-database (`dbi`) within that environment. + +## Compression + +Changed in: v4.3.0 — Compression is now enabled by default for all records over 4KB + +Harper compresses record data automatically for records over 4KB. Compression settings can be configured in the [storage configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'storage configuration options'). Note that compression settings cannot be changed on existing databases without creating a new compacted copy — see [Compaction](./compaction.md). + +## Performance Characteristics + +Harper inherits the following performance properties from LMDB: + +- **Memory-mapped I/O**: Data is accessed via memory mapping, enabling fast reads without data duplication between disk and memory +- **Buffer cache integration**: Fully exploits the OS buffer cache for reduced I/O +- **CPU cache optimization**: Built to maximize data locality within CPU caches +- **Deadlock-free writes**: Full serialization of writers guarantees write ordering without deadlocks +- **Zero-copy reads**: Readers access data directly from the memory map without copying + +## Indexing Example + +Given a table with records like this: + +``` +┌────┬────────┬────────┐ +│ id │ field1 │ field2 │ +├────┼────────┼────────┤ +│ 1 │ A │ X │ +│ 2 │ 25 │ X │ +│ 3 │ -1 │ Y │ +│ 4 │ A │ │ +│ 5 │ true │ 2 │ +└────┴────────┴────────┘ +``` + +Harper maintains three separate LMDB sub-databases for that table: + +``` +Table (LMDB environment file) +│ +├── primary index: id +│ ┌─────┬──────────────────────────────────────┐ +│ │ Key │ Value (full record) │ +│ ├─────┼──────────────────────────────────────┤ +│ │ 1 │ { id:1, field1:"A", field2:"X" } │ +│ │ 2 │ { id:2, field1:25, field2:"X" } │ +│ │ 3 │ { id:3, field1:-1, field2:"Y" } │ +│ │ 4 │ { id:4, field1:"A" } │ +│ │ 5 │ { id:5, field1:true, field2:2 } │ +│ └─────┴──────────────────────────────────────┘ +│ +├── secondary index: field1 secondary index: field2 +│ ┌────────┬───────┐ ┌────────┬───────┐ +│ │ Key │ Value │ │ Key │ Value │ +│ ├────────┼───────┤ ├────────┼───────┤ +│ │ -1 │ 3 │ │ 2 │ 5 │ +│ │ 25 │ 2 │ │ X │ 1 │ +│ │ A │ 1 │ │ X │ 2 │ +│ │ A │ 4 │ │ Y │ 3 │ +│ │ true │ 5 │ └────────┴───────┘ +│ └────────┴───────┘ +``` + +Secondary indexes store the attribute value as the key and the record's primary key (`id`) as the value. To resolve a query result, Harper looks up the matching ids in the secondary index, then fetches the full records from the primary index. + +Indexes are ordered — booleans first, then numbers (numerically), then strings (lexically) — enabling efficient range queries across all types. + +## Related Documentation + +- [Schema](./schema.md) — Defining indexed attributes and vector indexes +- [Compaction](./compaction.md) — Reclaiming free space and applying new storage configuration to existing databases +- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'storage section') — Storage configuration options (compression, memory maps, blob paths) diff --git a/reference_versioned_docs/version-v4/database/system-tables.md b/reference_versioned_docs/version-v4/database/system-tables.md new file mode 100644 index 00000000..6b457e9f --- /dev/null +++ b/reference_versioned_docs/version-v4/database/system-tables.md @@ -0,0 +1,158 @@ +--- +title: System Tables +--- + + + + + +# System Tables + +Harper maintains a set of internal system tables in the `system` database. These tables store analytics, job tracking, replication configuration, and other internal state. Most are read-only from the application perspective; some can be queried for observability or management purposes. + +System tables are prefixed with `hdb_` and reside in the `system` database. + +## Analytics Tables + +Added in: v4.5.0 (resource and storage analytics expansion) + +### `hdb_raw_analytics` + +Stores per-second, per-thread performance metrics. Records are written once per second (when there is activity) and include metrics for all operations, URL endpoints, and messaging topics, plus system resource information such as memory and CPU utilization. + +Records have a primary key equal to the timestamp in milliseconds since Unix epoch. + +Query with `search_by_conditions` (requires `superuser` permission): + +```json +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_raw_analytics", + "conditions": [ + { + "search_attribute": "id", + "search_type": "between", + "search_value": [1688594000000, 1688594010000] + } + ] +} +``` + +A typical record: + +```json +{ + "time": 1688594390708, + "period": 1000.8336279988289, + "metrics": [ + { + "metric": "bytes-sent", + "path": "search_by_conditions", + "type": "operation", + "median": 202, + "mean": 202, + "p95": 202, + "p90": 202, + "count": 1 + }, + { + "metric": "memory", + "threadId": 2, + "rss": 1492664320, + "heapTotal": 124596224, + "heapUsed": 119563120, + "external": 3469790, + "arrayBuffers": 798721 + }, + { + "metric": "utilization", + "idle": 138227.52767700003, + "active": 70.5066209952347, + "utilization": 0.0005098165086230495 + } + ], + "threadId": 2, + "totalBytesProcessed": 12182820, + "id": 1688594390708.6853 +} +``` + +### `hdb_analytics` + +Stores per-minute aggregate analytics. Once per minute, Harper aggregates all per-second raw entries from all threads into summary records in this table. Query it for longer-term performance trends. + +```json +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_analytics", + "conditions": [ + { + "search_attribute": "id", + "search_type": "between", + "search_value": [1688194100000, 1688594990000] + } + ] +} +``` + +A typical aggregate record: + +```json +{ + "period": 60000, + "metric": "bytes-sent", + "method": "connack", + "type": "mqtt", + "median": 4, + "mean": 4, + "p95": 4, + "p90": 4, + "count": 1, + "id": 1688589569646, + "time": 1688589569646 +} +``` + +For a full reference of available metrics and their fields, see [Analytics](../analytics/overview.md 'Complete analytics metrics reference'). + +## Data Loader Table + +### `hdb_dataloader_hash` + +Added in: v4.6.0 + +Used internally by the [Data Loader](./data-loader.md) to track which records have been loaded and detect changes. Stores SHA-256 content hashes of data file records so that unchanged records are not re-written on subsequent deployments. + +This table is managed automatically by the Data Loader. No direct interaction is required. + +## Replication Tables + +### `hdb_nodes` + +Stores the configuration and state of known nodes in a cluster, including connection details, replication settings, and revoked certificate serial numbers. + +Can be queried to inspect the current replication topology: + +```json +{ + "operation": "search_by_hash", + "schema": "system", + "table": "hdb_nodes", + "hash_values": ["node-id"] +} +``` + +Used by the `add_node`, `update_node`, and related clustering operations. See [Replication](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Clustering and node management') for details. + +### `hdb_certificate` + +Stores TLS certificates used in replication. Can be queried to inspect the certificates currently known to the cluster. + +## Related Documentation + +- [Analytics](../analytics/overview.md) — Full reference for analytics metrics tracked in `hdb_analytics` and `hdb_raw_analytics` +- [Data Loader](./data-loader.md) — Component that writes to `hdb_dataloader_hash` +- [Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md) — Clustering and replication system that uses `hdb_nodes` and `hdb_certificate` +- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Querying system tables using `search_by_conditions` diff --git a/reference_versioned_docs/version-v4/database/transaction.md b/reference_versioned_docs/version-v4/database/transaction.md new file mode 100644 index 00000000..65cbc697 --- /dev/null +++ b/reference_versioned_docs/version-v4/database/transaction.md @@ -0,0 +1,154 @@ +--- +title: Transaction Logging +--- + + + + + + + + +# Transaction Logging + +Harper provides two complementary mechanisms for recording a history of data changes on a table: the **audit log** and the **transaction log**. Both are available at the table level and serve different use cases. + +| Feature | Audit Log | Transaction Log | +| ----------------------------- | --------------------------------- | ------------------------------ | +| Storage | Standard Harper table (per-table) | Clustering streams (per-table) | +| Requires clustering | No | Yes | +| Available since | v4.1.0 | v4.1.0 | +| Stores original record values | Yes | No | +| Query by username | Yes | No | +| Query by primary key | Yes | No | +| Used for real-time messaging | Yes (required) | No | + +## Audit Log + +Available since: v4.1.0 + +The audit log is a data store that tracks every transaction across all tables in a database. Harper automatically creates and maintains a single audit log per database. The audit log captures the operation type, the user who made the change, the timestamp, and both the new and original record values. + +The audit log is **enabled by default**. To disable it, set [`logging.auditLog`](../logging/configuration.md) to `false` in `harperdb-config.yaml` and restart Harper. + +> The audit log is required for real-time messaging (WebSocket and MQTT subscriptions) and replication. Do not disable it if real-time features or replication are in use. + +### Audit Log Operations + +#### `read_audit_log` + +Queries the audit log for a specific table. Supports filtering by timestamp, username, or primary key value. + +**By timestamp:** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "timestamp", + "search_values": [1660585740558] +} +``` + +Timestamp behavior: + +| `search_values` | Result | +| --------------- | ---------------------------------------- | +| `[]` | All records for the table | +| `[timestamp]` | All records after the provided timestamp | +| `[from, to]` | Records between the two timestamps | + +**By username:** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "username", + "search_values": ["admin"] +} +``` + +**By primary key:** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "hash_value", + "search_values": [318] +} +``` + +**Response example:** + +```json +{ + "operation": "update", + "user_name": "HDB_ADMIN", + "timestamp": 1607035559122.277, + "hash_values": [1, 2], + "records": [ + { + "id": 1, + "breed": "Muttzilla", + "age": 6, + "__updatedtime__": 1607035559122 + } + ], + "original_records": [ + { + "__createdtime__": 1607035556801, + "__updatedtime__": 1607035556801, + "age": 5, + "breed": "Mutt", + "id": 1, + "name": "Harper" + } + ] +} +``` + +The `original_records` field contains the record state before the operation was applied. + +#### `delete_audit_logs_before` + +Deletes audit log entries older than the specified timestamp. + +Changed in: v4.3.0 — Audit log cleanup improved to reduce resource consumption during scheduled cleanups + +Changed in: v4.5.0 — Storage reclamation: Harper automatically evicts older audit log entries when free storage drops below a configurable threshold + +```json +{ + "operation": "delete_audit_logs_before", + "schema": "dev", + "table": "dog", + "timestamp": 1598290282817 +} +``` + +--- + +## Enabling Audit Log Per Table + +You can enable or disable the audit log for individual tables using the `@table` directive's `audit` argument in your schema: + +```graphql +type Dog @table(audit: true) { + id: ID @primaryKey + name: String +} +``` + +This overrides the [`logging.auditLog`](../logging/configuration.md) global configuration for that specific table. + +## Related Documentation + +- [Logging](../logging/overview.md) — Application and system logging (separate from transaction/audit logging) +- [Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md) — Clustering setup required for transaction logs +- [Logging Configuration](../logging/configuration.md) — Global audit log configuration (`logging.auditLog`) +- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Sending operations to Harper diff --git a/reference_versioned_docs/version-v4/logging/configuration.md b/reference_versioned_docs/version-v4/logging/configuration.md index 76b4396d..659296fd 100644 --- a/reference_versioned_docs/version-v4/logging/configuration.md +++ b/reference_versioned_docs/version-v4/logging/configuration.md @@ -102,7 +102,7 @@ Default: `false` Enables audit (table transaction) logging. When enabled, Harper records every insert, update, and delete to a corresponding audit table. Audit log data is accessed via the `read_audit_log` operation. -See [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') for details on using audit logs. +See [Database / Transaction Logging](../database/transaction.md) for details on using audit logs. ```yaml logging: @@ -366,5 +366,5 @@ http: - [Logging Overview](./overview) - [Logging API](./api) - [Logging Operations](./operations) -- [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') +- [Database / Transaction Logging](../database/transaction.md) - [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') diff --git a/reference_versioned_docs/version-v4/logging/operations.md b/reference_versioned_docs/version-v4/logging/operations.md index ab288b88..b4dbab5f 100644 --- a/reference_versioned_docs/version-v4/logging/operations.md +++ b/reference_versioned_docs/version-v4/logging/operations.md @@ -8,7 +8,7 @@ title: Logging Operations Operations for reading the standard Harper log (`hdb.log`). All operations are restricted to `super_user` roles only. -> Audit log and transaction log operations (`read_audit_log`, `read_transaction_log`, `delete_audit_logs_before`, `delete_transaction_logs_before`) are documented in [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging operations'). +> Audit log and transaction log operations (`read_audit_log`, `read_transaction_log`, `delete_audit_logs_before`, `delete_transaction_logs_before`) are documented in [Database / Transaction Logging](../database/transaction.md). --- @@ -87,5 +87,5 @@ _Restricted to super_user roles only._ - [Logging Overview](./overview) - [Logging Configuration](./configuration) -- [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit log and transaction log operations') +- [Database / Transaction Logging](../database/transaction.md) - [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') diff --git a/reference_versioned_docs/version-v4/logging/overview.md b/reference_versioned_docs/version-v4/logging/overview.md index 1bada4a3..862aa01c 100644 --- a/reference_versioned_docs/version-v4/logging/overview.md +++ b/reference_versioned_docs/version-v4/logging/overview.md @@ -10,7 +10,7 @@ title: Logging Harper's core logging system is used for diagnostics, monitoring, and observability. It has an extensive configuration system, and even supports feature-specific (per-component) configurations in latest versions. Furthermore, the `logger` global API is available for creating custom logs from any JavaScript application or plugin code. -> If you are looking for information on Harper's Audit and Transaction logging system, refer to the [Database](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') section. +> If you are looking for information on Harper's Audit and Transaction logging system, refer to the [Database](../database/transaction.md) section. ## Log File @@ -89,4 +89,4 @@ The `logger` global provides `trace`, `debug`, `info`, `warn`, `error`, `fatal`, - [Logging Configuration](./configuration) - [Logging API](./api) - [Logging Operations](./operations) -- [Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md 'Audit and transaction logging') +- [Database / Transaction Logging](../database/transaction.md) diff --git a/reference_versioned_docs/version-v4/mqtt/overview.md b/reference_versioned_docs/version-v4/mqtt/overview.md index bb9d7c55..829730b7 100644 --- a/reference_versioned_docs/version-v4/mqtt/overview.md +++ b/reference_versioned_docs/version-v4/mqtt/overview.md @@ -23,7 +23,7 @@ A topic of `my-resource/some-id` corresponds to the record with id `some-id` in - **Publishing** with the `retain` flag set replaces the record in the database (equivalent to a PUT operation). - **Publishing without** the `retain` flag delivers the message to current subscribers without writing to the database. -Defining a table that creates a topic can be as simple as adding a table with no attributes to your [schema.graphql](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition for defining tables and topics') in a Harper application: +Defining a table that creates a topic can be as simple as adding a table with no attributes to your [schema.graphql](../database/schema.md) in a Harper application: ```graphql type MyTopic @table @export @@ -138,5 +138,5 @@ Available events: - [MQTT Configuration](./configuration) - [HTTP Overview](../http/overview.md) - [Security Overview](../security/overview.md) -- [Database Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Defining tables and topics with schema.graphql') +- [Database Schema](../database/schema.md) - [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface — same path conventions as MQTT topics') diff --git a/reference_versioned_docs/version-v4/rest/overview.md b/reference_versioned_docs/version-v4/rest/overview.md index 54574101..e38b2e4c 100644 --- a/reference_versioned_docs/version-v4/rest/overview.md +++ b/reference_versioned_docs/version-v4/rest/overview.md @@ -17,7 +17,7 @@ Harper provides a powerful, efficient, and standard-compliant HTTP REST interfac Harper's REST interface exposes database tables and custom resources as RESTful endpoints. Tables are **not** exported by default; they must be explicitly exported in a schema definition. The name of the exported resource defines the base of the endpoint path, served on the application HTTP server port (default `9926`). -For more on defining schemas and exporting resources, see [TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition']. +For more on defining schemas and exporting resources, see [Database / Schema](../database/schema.md). ## Configuration @@ -156,4 +156,4 @@ GET /openapi - [WebSockets](./websockets.md) — Real-time connections via WebSocket - [Server-Sent Events](./server-sent-events.md) — One-way streaming via SSE - [HTTP Server](../http/overview.md) — Underlying HTTP server configuration -- [Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition') — How to define and export resources +- [Database / Schema](../database/schema.md) — How to define and export resources diff --git a/reference_versioned_docs/version-v4/rest/querying.md b/reference_versioned_docs/version-v4/rest/querying.md index 83070f56..49662cbc 100644 --- a/reference_versioned_docs/version-v4/rest/querying.md +++ b/reference_versioned_docs/version-v4/rest/querying.md @@ -251,11 +251,11 @@ This only works for properties declared in the schema. As of v4.5.0, dots in URL Added in: v4.5.0 -Resources can be configured with `directURLMapping: true` for more direct URL path handling. When enabled, the URL path is mapped more directly to the resource without the default query parameter parsing semantics. See [Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema and resource configuration') for configuration details. +Resources can be configured with `directURLMapping: true` for more direct URL path handling. When enabled, the URL path is mapped more directly to the resource without the default query parameter parsing semantics. See [Database / Schema](../database/schema.md) for configuration details. ## See Also - [REST Overview](./overview.md) — HTTP methods, URL structure, and caching - [Headers](./headers.md) — Request and response headers - [Content Types](./content-types.md) — Encoding formats -- [Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition') — Defining schemas, relationships, and indexes +- [Database / Schema](../database/schema.md) — Defining schemas, relationships, and indexes diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 299587d6..159180ce 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -294,6 +294,59 @@ } ] }, + { + "type": "category", + "label": "Database", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "database/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "database/schema", + "label": "Schema" + }, + { + "type": "doc", + "id": "database/api", + "label": "API" + }, + { + "type": "doc", + "id": "database/data-loader", + "label": "Data Loader" + }, + { + "type": "doc", + "id": "database/storage-algorithm", + "label": "Storage Algorithm" + }, + { + "type": "doc", + "id": "database/jobs", + "label": "Jobs" + }, + { + "type": "doc", + "id": "database/system-tables", + "label": "System Tables" + }, + { + "type": "doc", + "id": "database/compaction", + "label": "Compaction" + }, + { + "type": "doc", + "id": "database/transaction", + "label": "Transaction Logging" + } + ] + }, { "type": "category", "label": "Legacy", diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index 42ce6c11..8698c3cb 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -295,6 +295,7 @@ Based on migration map and reference plan, recommend this order. Each section is 4. **Database** (`reference_versioned_docs/version-v4/database/`) - `overview.md` - `schema.md` + - `api.md` _(JS globals: `tables`, `databases`, `transaction()`, `createBlob()`)_ - `data-loader.md` - `storage-algorithm.md` - `jobs.md` diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 479c7eee..1757f1dd 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -348,6 +348,19 @@ Broken out from the security section during migration — RBAC warrants its own - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Compact database functionality +### `reference/database/api.md` + +- **Primary Source**: `versioned_docs/version-4.7/reference/globals.md` (tables, databases globals) +- **Additional Sources**: + - `versioned_docs/version-4.7/reference/transactions.md` (transaction() function) + - `versioned_docs/version-4.7/reference/blob.md` (createBlob() function) +- **Merge Required**: Yes — combines tables/databases globals, transaction(), and createBlob() into one page +- **Version Annotations**: Blob type added in v4.5.0 +- **Status**: In Progress +- **Notes**: Covers the JS globals most relevant to database interaction. Server globals (server.http, server.ws, etc.) belong in the HTTP/Resources sections. +- **Release Notes**: + - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Blob storage and createBlob() added + ### `reference/database/transaction.md` - **Primary Source**: `versioned_docs/version-4.7/administration/logging/transaction-logging.md` From 625fa2b615e6079bf4b082100c10b2bdedd67174 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Thu, 26 Mar 2026 18:09:08 -0600 Subject: [PATCH 25/51] Resources Section Migration (#459) * docs: migrate Resources section to v4 consolidated reference Adds overview, resource-api, global-apis, and query-optimization pages for the Resources section of the v4 reference docs. Co-Authored-By: Claude Sonnet 4.6 * fixup! docs: migrate Resources section to v4 consolidated reference * fixup! docs: migrate Resources section to v4 consolidated reference * fixup! docs: migrate Resources section to v4 consolidated reference * fixup! docs: migrate Resources section to v4 consolidated reference * fixup! docs: migrate Resources section to v4 consolidated reference * fixup! docs: migrate Resources section to v4 consolidated reference * fixup! docs: migrate Resources section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- .../resources-link-placeholders.md | 21 + .../version-v4/http/api.md | 2 +- .../version-v4/resources/overview.md | 130 ++++ .../resources/query-optimization.md | 219 ++++++ .../version-v4/resources/resource-api.md | 633 ++++++++++++++++++ .../version-v4/security/api.md | 23 + .../version-v4/security/overview.md | 4 + .../version-v4-sidebars.json | 28 + 8 files changed, 1059 insertions(+), 1 deletion(-) create mode 100644 migration-context/link-placeholders/resources-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/resources/overview.md create mode 100644 reference_versioned_docs/version-v4/resources/query-optimization.md create mode 100644 reference_versioned_docs/version-v4/resources/resource-api.md create mode 100644 reference_versioned_docs/version-v4/security/api.md diff --git a/migration-context/link-placeholders/resources-link-placeholders.md b/migration-context/link-placeholders/resources-link-placeholders.md new file mode 100644 index 00000000..02f673ac --- /dev/null +++ b/migration-context/link-placeholders/resources-link-placeholders.md @@ -0,0 +1,21 @@ +# Link Placeholders for Resources + +## reference_versioned_docs/version-v4/resources/overview.md + +- Line 27: `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` + - Context: Explaining that the REST plugin maps HTTP requests to Resource methods + - Target should be: REST section overview page + +- Line 28: `[MQTT Overview](TODO:reference_versioned_docs/version-v4/mqtt/overview.md)` + - Context: Explaining that the MQTT plugin routes publish/subscribe to Resource methods + - Target should be: MQTT section overview page + +## reference_versioned_docs/version-v4/resources/resource-api.md + +No TODO placeholders — all cross-section links use relative paths for in-section links and section-relative paths otherwise. The global-apis.md has the external TODOs listed above. + +## reference_versioned_docs/version-v4/resources/query-optimization.md + +- Line 57: `[Schema documentation](TODO:reference_versioned_docs/version-v4/database/schema.md)` + - Context: Pointing to where relationship directives are defined in schemas + - Target should be: Database section schema page (not yet merged as of this PR) diff --git a/reference_versioned_docs/version-v4/http/api.md b/reference_versioned_docs/version-v4/http/api.md index e523f331..2690913e 100644 --- a/reference_versioned_docs/version-v4/http/api.md +++ b/reference_versioned_docs/version-v4/http/api.md @@ -398,4 +398,4 @@ contentTypes.set('text/xml', { - [HTTP Overview](./overview) - [HTTP Configuration](./configuration) - [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface overview') -- [Global APIs](TODO:reference_versioned_docs/version-v4/resources/global-apis.md 'All global APIs including tables, databases, Resource, logger, auth') +- [Security API](../security/api.md) diff --git a/reference_versioned_docs/version-v4/resources/overview.md b/reference_versioned_docs/version-v4/resources/overview.md new file mode 100644 index 00000000..9dbad99f --- /dev/null +++ b/reference_versioned_docs/version-v4/resources/overview.md @@ -0,0 +1,130 @@ +--- +title: Resources Overview +--- + + + + +# Resources + +Harper's Resource API is the foundation for building custom data access logic and connecting data sources. Resources are JavaScript classes that define how data is accessed, modified, subscribed to, and served over HTTP, MQTT, and WebSocket protocols. + +## What Is a Resource? + +A **Resource** is a class that provides a unified interface for a set of records or entities. Harper's built-in tables extend the base `Resource` class, and you can extend either `Resource` or a table class to implement custom behavior for any data source — internal or external. + +Added in: v4.2.0 + +The Resource API is designed to mirror REST/HTTP semantics: methods map directly to HTTP verbs (`get`, `put`, `patch`, `post`, `delete`), making it straightforward to build API endpoints alongside custom data logic. + +## Relationship to Other Features + +- **Database tables** extend `Resource` automatically. You can use tables through the Resource API without writing any custom code. +- The **REST plugin** maps incoming HTTP requests to Resource methods. See [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST plugin reference'). +- The **MQTT plugin** routes publish/subscribe messages to `publish` and `subscribe` Resource methods. See [MQTT Overview](TODO:reference_versioned_docs/version-v4/mqtt/overview.md 'MQTT plugin reference'). +- **Global APIs** (`tables`, `databases`, `transaction`) provide access to resources from JavaScript code. +- The **`jsResource` plugin** (configured in `config.yaml`) registers a JavaScript file's exported Resource classes as endpoints. + +## Resource API Versions + +The Resource API has two behavioral modes controlled by the `loadAsInstance` static property: + +- **V2 (recommended, `loadAsInstance = false`)**: Instance methods receive a `RequestTarget` as the first argument; no record is preloaded onto `this`. Recommended for all new code. +- **V1 (legacy, `loadAsInstance = true`)**: Instance methods are called with `this` pre-bound to the matching record. Preserved for backwards compatibility. + +The [Resource API reference](./resource-api.md) is written against V2. For V1 behavior and migration guidance, see the legacy instance binding section of that page. + +## Extending a Table + +The most common use case is extending an existing table to add custom logic. + +Starting with a table definition in a `schema.graphql`: + +```graphql +# Omit the `@export` directive +type MyTable @table { + id: ID @primaryKey + # ... +} +``` + +> For more info on the schema API see [`Database / Schema`]() + +Then, in a `resources.js` extend from the `tables.MyTable` global: + +```javascript +export class MyTable extends tables.MyTable { + static loadAsInstance = false; // use V2 API + + get(target) { + // add a computed property before returning + + const record = await super.get(target) + + return { ...record, computedField: 'value' }; + } + + post(target, data) { + // custom action on POST + this.create({ ...data, status: 'pending' }); + } +} +``` + +Finally, ensure everything is configured appropriately: + +```yaml +rest: true +graphqlSchema: + files: schema.graphql +jsResource: + files: resources.js +``` + +## Custom External Data Source + +You can also extend the base `Resource` class directly to implement custom endpoints, or even wrap an external API or service as a custom caching layer: + +```javascript +export class CustomEndpoint extends Resource { + static loadAsInstance = false; + + get(target) { + return { + data: doSomething(), + }; + } +} + +export class MyExternalData extends Resource { + static loadAsInstance = false; + + async get(target) { + const response = await fetch(`https://api.example.com/${target.id}`); + return response.json(); + } + + put(target, data) { + return fetch(`https://api.example.com/${target.id}`, { + method: 'PUT', + body: JSON.stringify(data), + }); + } +} + +// Use as a cache source for a local table +tables.MyCache.sourcedFrom(MyExternalData); +``` + +Resources are the true customization point for Harper. This is where the business logic of a Harper application really lives. There is a lot more to this API than these examples show. Ensure you fully review the [Resource API](./resource-api.md) documentation, and consider exploring the Learn guides for more information. + +## Exporting Resources as Endpoints + +Resources become HTTP/MQTT endpoints when they are exported. As the examples demonstrated if a Resource extends an existing table, make sure to not have conflicting exports between the schema and the JavaScript implementation. Alternatively, you can register resources programmatically using `server.resources.set()`. See [HTTP API](../http/api.md) for server extension documentation. + +## Pages in This Section + +| Page | Description | +| --------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| [Resource API](./resource-api.md) | Complete reference for instance methods, static methods, the Query object, RequestTarget, and response handling | +| [Query Optimization](./query-optimization.md) | How Harper executes queries and how to write performant conditions | diff --git a/reference_versioned_docs/version-v4/resources/query-optimization.md b/reference_versioned_docs/version-v4/resources/query-optimization.md new file mode 100644 index 00000000..d8148eaa --- /dev/null +++ b/reference_versioned_docs/version-v4/resources/query-optimization.md @@ -0,0 +1,219 @@ +--- +title: Query Optimization +--- + + + + +# Query Optimization + +Added in: v4.3.0 (query planning and execution improvements) + +Harper has powerful query functionality with excellent performance characteristics. Like any database, different queries can vary significantly in performance. Understanding how querying works helps you write queries that perform well as your dataset grows. + +## Query Execution + +At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database. Harper supports indexed fields, and these indexes are used to speed up query execution. + +When conditions are specified in a query, Harper attempts to utilize indexes to optimize the speed of query execution. When a field is not indexed, Harper checks each potential record to determine if it matches the condition — this is a full table scan and degrades as data grows (`O(n)`). + +When a query has multiple conditions, Harper attempts to optimize their execution order. For intersecting conditions (the default `and` operator), Harper applies the most selective and performant condition first. If one condition can use an index and is more selective than another, it is used first to narrow the candidate set before filtering on the remaining conditions. + +The `search` method supports an `explain` flag that returns the query execution order Harper determined, useful for debugging and optimization: + +```javascript +const result = await MyTable.search({ + conditions: [...], + explain: true, +}); +``` + +For union queries (`or` operator), each condition is executed separately and the results are merged. + +## Conditions, Operators, and Indexing + +When a query is executed, conditions are evaluated against the database. Indexed fields significantly improve query performance. + +### Index Performance Characteristics + +| Operator | Uses index | Notes | +| -------------------------------------------------------------------- | ------------------ | ------------------------------------------------------------------------ | +| `equals` | Yes | Fast lookup in sorted index | +| `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal` | Yes | Range scan in sorted index; narrower range = faster | +| `starts_with` | Yes | Prefix search in sorted index | +| `not_equal` | No | Full scan required (unless combined with selective indexed condition) | +| `contains` | No | Full scan required | +| `ends_with` | No | Full scan required | +| `!= null` | Yes (special case) | Can use indexes to find non-null records; only helpful for sparse fields | + +**Rule of thumb**: Use `equals`, range operators, and `starts_with` on indexed fields. Avoid `contains`, `ends_with`, and `not_equal` as the sole or first condition in large datasets. + +### Indexed vs. Non-Indexed Fields + +Indexed fields provide `O(log n)` lookup — fast even as the dataset grows. Non-indexed fields require `O(n)` full table scans. + +Trade-off: indexes speed up reads but add overhead to writes (insert/update/delete must update the index). This is usually worth it for frequently queried fields. + +### Primary Key vs. Secondary Index + +Querying on a **primary key** is faster than querying on a secondary (non-primary) index, because the primary key directly addresses the record without cross-referencing. + +Secondary indexes are still valuable for query conditions on other fields, but expect slightly more overhead than primary key lookups. + +### Cardinality + +More unique values (higher cardinality) = more efficient indexed lookups. For example, an index on a boolean field has very low cardinality (only two possible values) and is less efficient than an index on a `UUID` field. High-cardinality fields benefit most from indexing. + +## Relationships and Joins + +Harper supports relationship-based queries that join data across tables. See [Schema documentation](TODO:reference_versioned_docs/version-v4/database/schema.md 'Database schema section with relationship directives') for how to define relationships. + +Join queries involve more lookups and naturally carry more overhead. The same indexing principles apply: + +- Conditions on joined table fields should use indexed columns for best performance. +- If a relationship uses a foreign key, that foreign key should be indexed in both tables. +- Higher cardinality foreign keys make joins more efficient. + +Example of an indexed foreign key that enables efficient join queries: + +```graphql +type Product @table { + id: ID @primaryKey + brandId: ID @indexed # foreign key — index this + brand: Related @relation(from: "brandId") +} +type Brand @table { + id: ID @primaryKey + name: String @indexed # indexed — enables efficient brand.name queries + products: Product @relation(to: "brandId") +} +``` + +Added in: v4.3.0 + +## Sorting + +Sorting can significantly impact query performance. + +- **Aligned sort and index**: If the sort attribute is the same indexed field used in the primary condition, Harper can use the index to retrieve results already in order — very fast. +- **Unaligned sort**: If the sort is on a different field than the condition, or the sort field is not indexed, Harper must retrieve and sort all matching records. For large result sets this can be slow, and it also **defeats streaming** (see below). + +Best practice: sort on the same indexed field you are filtering on, or sort on a secondary indexed field with a narrow enough condition to produce a manageable result set. + +## Streaming + +Harper can stream query results — returning records as they are found rather than waiting for the entire query to complete. This improves time-to-first-byte for large queries and reduces peak memory usage. + +**Streaming is defeated** when: + +- A sort order is specified that is not aligned with the condition's index +- The full result set must be materialized to perform sorting + +When streaming is possible, results are returned as an `AsyncIterable`: + +```javascript +for await (const record of MyTable.search({ conditions: [...] })) { + // process each record as it arrives +} +``` + +Failing to iterate the `AsyncIterable` to completion keeps a read transaction open, degrading performance. Always ensure you either fully iterate or explicitly release the query. + +### Draining or Releasing a Query + +An open query holds an active read transaction. While that transaction is open, the underlying data pages and internal state for the query cannot be freed — they remain pinned in memory until the transaction closes. In long-running processes or under high concurrency, accumulating unreleased transactions degrades throughput and increases memory pressure. + +The transaction closes automatically once the `AsyncIterable` is fully iterated. If you need to stop early, you must explicitly signal that iteration is complete so Harper can release the transaction. + +**Breaking out of a `for await...of` loop** is the most natural way. The JavaScript runtime automatically calls `.return()` on the iterator when a `break`, `return`, or `throw` exits the loop: + +```javascript +for await (const record of MyTable.search({ conditions: [...] })) { + if (meetsStopCriteria(record)) { + break; // iterator.return() is called automatically — transaction is released + } + process(record); +} +``` + +**Calling `.return()` manually** is useful when you hold an iterator reference directly: + +```javascript +const iterator = MyTable.search({ conditions: [...] })[Symbol.asyncIterator](); +try { + const { value } = await iterator.next(); + process(value); +} finally { + await iterator.return(); // explicitly closes the iterator and releases the transaction +} +``` + +Avoid storing an iterator and abandoning it (e.g. never calling `.next()` again without calling `.return()`), as the transaction will remain open until the iterator is garbage collected — which is non-deterministic. + +## Practical Guidance + +### Index fields you query on frequently + +```graphql +type Product @table { + id: ID @primaryKey + name: String @indexed # queried frequently + category: String @indexed # queried frequently + description: String # not indexed (rarely in conditions) +} +``` + +### Use `explain` to diagnose slow queries + +```javascript +const result = await Product.search({ + conditions: [ + { attribute: 'category', value: 'electronics' }, + { attribute: 'price', comparator: 'less_than', value: 100 }, + ], + explain: true, +}); +// result shows the actual execution order Harper selected +``` + +### Prefer selective conditions first + +When Harper cannot auto-reorder (e.g. with `enforceExecutionOrder`), put the most selective condition first: + +```javascript +// Better: indexed, selective condition first +Product.search({ + conditions: [ + { attribute: 'sku', value: 'ABC-001' }, // exact match on indexed unique field + { attribute: 'active', value: true }, // low cardinality filter + ], +}); +``` + +### Use `limit` and `offset` for pagination + +```javascript +Product.search({ + conditions: [...], + sort: { attribute: 'createdAt', descending: true }, + limit: 20, + offset: page * 20, +}); +``` + +### Avoid wide range queries on non-indexed fields + +```javascript +// Slow: non-indexed field with range condition +Product.search({ + conditions: [{ attribute: 'description', comparator: 'contains', value: 'sale' }], +}); + +// Better: use an indexed field condition to narrow first +Product.search({ + conditions: [ + { attribute: 'category', value: 'clothing' }, // indexed — narrows to subset + { attribute: 'description', comparator: 'contains', value: 'sale' }, // non-indexed, applied to smaller set + ], +}); +``` diff --git a/reference_versioned_docs/version-v4/resources/resource-api.md b/reference_versioned_docs/version-v4/resources/resource-api.md new file mode 100644 index 00000000..3916261f --- /dev/null +++ b/reference_versioned_docs/version-v4/resources/resource-api.md @@ -0,0 +1,633 @@ +--- +title: Resource API +--- + + + + + + + + + + +# Resource API + +Added in: v4.2.0 + +The Resource API provides a unified JavaScript interface for accessing, querying, modifying, and subscribing to data resources in Harper. Tables extend the base `Resource` class, and all resource interactions — whether from HTTP requests, MQTT messages, or application code — flow through this interface. + +## API Versions + +The Resource API has two behavioral modes selected by the `loadAsInstance` static property: + +| Version | `loadAsInstance` | Status | +| ------------ | ---------------- | ------------------------------------- | +| V2 (current) | `false` | Recommended for new code | +| V1 (legacy) | `true` | Preserved for backwards compatibility | + +This page documents V2 behavior (`loadAsInstance = false`). For V1 (legacy instance binding) behavior and migration examples, see [Legacy Instance Binding](#legacy-instance-binding-v1). + +### V2 Behavioral Differences from V1 + +Changed in: v4.6.0 (Resource API upgrades that formalized V2) + +When `loadAsInstance = false`: + +- Instance methods receive a `RequestTarget` as their first argument; no record is preloaded onto `this`. +- The `get` method returns the record as a plain (frozen) object rather than a Resource instance. +- `put`, `post`, and `patch` receive `(target, data)` — **arguments are reversed from V1**. +- Authorization is handled via `target.checkPermission` rather than `allowRead`/`allowUpdate`/etc. methods. Set it to `false` to bypass permission checks entirely (e.g. for a public read endpoint), or leave it at its default to require superuser access for write operations: + + ```javascript + // Public read — no auth required + get(target) { + target.checkPermission = false; + return super.get(target); + } + + // POST is superuser-only by default — no change needed + post(target, data) { + return super.post(target, data); + } + ``` + + `checkPermission` can also be set to a non-boolean value to delegate to role-based or schema-defined permissions — see the authorization documentation for details. + +- The `update` method returns an `Updatable` object instead of a Resource instance. +- Context is tracked automatically via async context tracking; set `static explicitContext = true` to disable (improves performance). +- `getId()` is not used and returns `undefined`. + +--- + +## Resource Instance Methods + +These methods are defined on a Resource class and called when requests are routed to the resource. Override them to define custom behavior. + +### `get(target: RequestTarget): Promise | AsyncIterable` + +Called for HTTP GET requests. When the request targets a single record (e.g. `/Table/some-id`), returns a single record object. When the request targets a collection (e.g. `/Table/?name=value`), the `target.isCollection` property is `true` and the default behavior calls `search()`, returning an `AsyncIterable`. + +```javascript +class MyResource extends Resource { + static loadAsInstance = false; + + get(target) { + const id = target.id; // primary key from URL path + const param = target.get('param1'); // query string param + const path = target.pathname; // path relative to resource + return super.get(target); // default: return the record + } +} +``` + +The default `super.get(target)` returns a `RecordObject` — a frozen plain object with the record's properties plus `getUpdatedTime()` and `getExpiresAt()`. + +:::caution Common gotchas + +- **`/Table` vs `/Table/`** — `GET /Table` returns metadata about the table resource itself. `GET /Table/` (trailing slash) targets the collection and invokes `get()` as a collection request. These are distinct endpoints. +- **Case sensitivity** — The URL path must match the exact casing of the exported resource or table name. `/Table/` works; `/table/` returns a 404. + +::: + +### `search(query: RequestTarget): AsyncIterable` + +Performs a query on the resource or table. Called by `get()` on collection requests. Can be overridden to define custom query behavior. The default implementation on tables queries by the `conditions`, `limit`, `offset`, `select`, and `sort` properties parsed from the URL. + +### `put(target: RequestTarget | Id, data: object): void | Response` + +Called for HTTP PUT requests. Writes the full record to the table, creating or replacing the existing record. + +```javascript +put(target, data) { + // validate or transform before saving + super.put(target, { ...data, status: data.status ?? 'active' }); +} +``` + +### `patch(target: RequestTarget | Id, data: object): void | Response` + +Called for HTTP PATCH requests. Merges `data` into the existing record, preserving any properties not included in `data`. + +Added in: v4.3.0 (CRDT support for individual property updates via PATCH) + +### `post(target: RequestTarget | Id, data: object): void | Response` + +Called for HTTP POST requests. Default behavior creates a new record. Override to implement custom actions. + +### `delete(target: RequestTarget | Id): void | Response` + +Called for HTTP DELETE requests. Default behavior deletes the record identified by `target`. + +### `update(target: RequestTarget, updates?: object): Updatable` + +Returns an `Updatable` instance providing mutable property access to a record. Any property changes on the `Updatable` are written to the database when the transaction commits. + +```javascript +post(target, data) { + const record = this.update(target.id); + record.quantity = record.quantity - 1; + // saved automatically on transaction commit +} +``` + +#### `Updatable` class + +The `Updatable` class provides direct property access plus: + +##### `addTo(property: string, value: number)` + +Adds `value` to `property` using CRDT incrementation — safe for concurrent updates across threads and nodes. + +Added in: v4.3.0 + +```javascript +post(target, data) { + const record = this.update(target.id); + record.addTo('quantity', -1); // decrement safely across nodes +} +``` + +##### `subtractFrom(property: string, value: number)` + +Subtracts `value` from `property` using CRDT incrementation. + +##### `set(property: string, value: any): void` + +Sets a property to `value`. Equivalent to direct property assignment (`record.property = value`), but useful when the property name is dynamic. + +```javascript +const record = this.update(target.id); +record.set('status', 'active'); +``` + +##### `getProperty(property: string): any` + +Returns the current value of `property` from the record. Useful when the property name is dynamic or when you want an explicit read rather than direct property access. + +```javascript +const record = this.update(target.id); +const current = record.getProperty('status'); +``` + +##### `getUpdatedTime(): number` + +Returns the last updated time as milliseconds since epoch. + +##### `getExpiresAt(): number` + +Returns the expiration time, if one is set. + +### `publish(target: RequestTarget, message: object): void | Response` + +Called for MQTT publish commands. Default behavior records the message and notifies subscribers without changing the record's stored data. + +### `subscribe(subscriptionRequest?: SubscriptionRequest): Promise` + +Called for MQTT subscribe commands. Returns a `Subscription` — an `AsyncIterable` of messages/changes. + +#### `SubscriptionRequest` options + +All properties are optional: + +| Property | Description | +| -------------------- | ---------------------------------------------------------------------------------------------- | +| `includeDescendants` | Include all updates with an id prefixed by the subscribed id (e.g. `sub/*`) | +| `startTime` | Start from a past time (catch-up of historical messages). Cannot be used with `previousCount`. | +| `previousCount` | Return the last N updates/messages. Cannot be used with `startTime`. | +| `omitCurrent` | Do not send the current/retained record as the first update. | + +### `connect(target: RequestTarget, incomingMessages?: AsyncIterable): AsyncIterable` + +Called for WebSocket and Server-Sent Events connections. `incomingMessages` is provided for WebSocket connections (not SSE). Returns an `AsyncIterable` of messages to send to the client. + +### `invalidate(target: RequestTarget)` + +Marks the specified record as invalid in a caching table, so it will be reloaded from the source on next access. + +### `allowStaleWhileRevalidate(entry, id): boolean` + +For caching tables: return `true` to serve the stale entry while revalidation happens concurrently; `false` to wait for the fresh value. + +Entry properties: + +- `version` — Timestamp/version from the source +- `localTime` — When the resource was last refreshed locally +- `expiresAt` — When the entry became stale +- `value` — The stale record value + +### `getUpdatedTime(): number` + +Returns the last updated time of the resource (milliseconds since epoch). + +### `wasLoadedFromSource(): boolean` + +For caching tables, indicates that this request was a cache miss and the data was loaded from the source resource. + +### `getContext(): Context` + +Returns the current context, which includes: + +- `user` — User object with username, role, and authorization information +- `transaction` — The current transaction + +When triggered by HTTP, the context is the `Request` object with these additional properties: + +- `url` — Full local path including query string +- `method` — HTTP method +- `headers` — Request headers (access with `context.headers.get(name)`) +- `responseHeaders` — Response headers (set with `context.responseHeaders.set(name, value)`) +- `pathname` — Path without query string +- `host` — Host from the `Host` header +- `ip` — Client IP address +- `body` — Raw Node.js `Readable` stream (if a request body exists) +- `data` — Promise resolving to the deserialized request body +- `lastModified` — Controls the `ETag`/`Last-Modified` response header +- `requestContext` — (For source resources only) Context of the upstream resource making the data request + +### `operation(operationObject: object, authorize?: boolean): Promise` + +Executes a Harper operations API call using this table as the target. Set `authorize` to `true` to enforce current-user authorization. + +--- + +## Resource Static Methods + +Static methods are the preferred way to interact with tables and resources from application code. They handle transaction setup, access checks, and request parsing automatically. + +All instance methods have static equivalents that accept an `id` or `RequestTarget` as the first argument: + +### `get(target: RequestTarget | Id | Query, context?: Resource | Context)` + +Retrieve a record by primary key, or query for records. + +```javascript +// By primary key +const product = await Product.get(34); + +// By query object +const product = await Product.get({ id: 34, select: ['name', 'price'] }); + +// Iterate a collection query +for await (const record of Product.get({ conditions: [{ attribute: 'inStock', value: true }] })) { + // ... +} +``` + +### `put(target: RequestTarget | Id, record: object, context?): Promise` + +### `put(record: object, context?): Promise` + +Save a record (create or replace). The second form reads the primary key from the `record` object. + +### `create(record: object, context?): Promise` + +Create a new record with an auto-generated primary key. Returns the created record. Do not include a primary key in the `record` argument. + +Added in: v4.2.0 + +### `patch(target: RequestTarget | Id, updates: object, context?): Promise` + +Apply partial updates to an existing record. + +### `post(target: RequestTarget | Id, data: object, context?): Promise` + +Call the `post` instance method. Defaults to creating a new record. + +### `delete(target: RequestTarget | Id, context?): Promise` + +Delete a record. + +### `publish(target: RequestTarget | Id, message: object, context?): Promise` + +Publish a message to a record/topic. + +### `subscribe(subscriptionRequest?, context?): Promise` + +Subscribe to record changes or messages. + +### `search(query: RequestTarget | Query, context?): AsyncIterable` + +Query the table. See [Query Object](#query-object) below for available query options. + +### `setComputedAttribute(name: string, computeFunction: (record) => any)` + +Define the compute function for a `@computed` schema attribute. + +Added in: v4.4.0 + +```javascript +MyTable.setComputedAttribute('fullName', (record) => `${record.firstName} ${record.lastName}`); +``` + +### `getRecordCount({ exactCount?: boolean }): Promise<{ recordCount: number, estimatedRange?: [number, number] }>` + +Returns the number of records in the table. By default returns an approximate (fast) count. Pass `{ exactCount: true }` for a precise count. + +Added in: v4.5.0 + +### `sourcedFrom(Resource, options?)` + +Configure a table to use another resource as its data source (caching behavior). When a record is not found locally, it is fetched from the source and cached. Writes are delegated to the source. + +Options: + +- `expiration` — Default TTL in seconds +- `eviction` — Eviction time in seconds +- `scanInterval` — Period for scanning expired records + +### `parsePath(path, context, query)` + +Called by static methods when processing a URL path. Can be overridden to preserve the path directly as the primary key: + +```javascript +static parsePath(path) { + return path; // use full path as id, no parsing +} +``` + +### `directURLMapping` + +Set this static property to `true` to map the full URL (including query string) as the primary key, bypassing query parsing. + +Added in: v4.5.0 (documented in improved URL path parsing) + +```javascript +export class MyTable extends tables.MyTable { + static directURLMapping = true; +} +// GET /MyTable/test?foo=bar → primary key is 'test?foo=bar' +``` + +### `primaryKey` + +The name of the primary key attribute for the table. + +```javascript +const record = await Table.get(34); +record[Table.primaryKey]; // → 34 +``` + +### `isCollection(resource): boolean` + +Returns `true` if the resource instance represents a collection (query result) rather than a single record. + +--- + +## Query Object + +The `Query` object is accepted by `search()` and the static `get()` method. + +### `conditions` + +Array of condition objects to filter records. Each condition: + +| Property | Description | +| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `attribute` | Property name, or an array for chained/joined properties (e.g. `['brand', 'name']`) | +| `value` | The value to match | +| `comparator` | `equals` (default), `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `starts_with`, `contains`, `ends_with`, `between`, `not_equal` | +| `conditions` | Nested conditions array | +| `operator` | `and` (default) or `or` for the nested `conditions` | + +Example with nested conditions: + +```javascript +Product.search({ + conditions: [ + { attribute: 'price', comparator: 'less_than', value: 100 }, + { + operator: 'or', + conditions: [ + { attribute: 'rating', comparator: 'greater_than', value: 4 }, + { attribute: 'featured', value: true }, + ], + }, + ], +}); +``` + +**Chained attribute references** (for relationships/joins): Use an array to traverse relationship properties: + +```javascript +Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); +``` + +Added in: v4.3.0 + +### `operator` + +Top-level `and` (default) or `or` for the `conditions` array. + +### `limit` + +Maximum number of records to return. + +### `offset` + +Number of records to skip (for pagination). + +### `select` + +Properties to include in each returned record. Can be: + +- Array of property names: `['name', 'price']` +- Nested select for related records: `[{ name: 'brand', select: ['id', 'name'] }]` +- String to return a single property per record: `'id'` + +Special properties: + +- `$id` — Returns the primary key regardless of its name +- `$updatedtime` — Returns the last-updated timestamp + +### `sort` + +Sort order object: + +| Property | Description | +| ------------ | ---------------------------------------------------------- | +| `attribute` | Property name (or array for chained relationship property) | +| `descending` | Sort descending if `true` (default: `false`) | +| `next` | Secondary sort to resolve ties (same structure) | + +### `explain` + +If `true`, returns conditions reordered as Harper will execute them (for debugging and optimization). + +### `enforceExecutionOrder` + +If `true`, forces conditions to execute in the order supplied, disabling Harper's automatic re-ordering optimization. + +--- + +## RequestTarget + +`RequestTarget` represents a URL path mapped to a resource. It is a subclass of `URLSearchParams`. + +Properties: + +- `pathname` — Path relative to the resource, without query string +- `search` — The query/search string portion of the URL +- `id` — Primary key derived from the path +- `isCollection` — `true` when the request targets a collection +- `checkPermission` — Set to indicate authorization should be performed; has `action`, `resource`, and `user` sub-properties + +Standard `URLSearchParams` methods are available: + +- `get(name)`, `getAll(name)`, `set(name, value)`, `append(name, value)`, `delete(name)`, `has(name)` +- Iterable: `for (const [name, value] of target) { ... }` + +When a URL uses Harper's extended query syntax, these are parsed onto the target: + +- `conditions`, `limit`, `offset`, `sort`, `select` + +--- + +## RecordObject + +The `get()` method returns a `RecordObject` — a frozen plain object with all record properties, plus: + +- `getUpdatedTime(): number` — Last updated time (milliseconds since epoch) +- `getExpiresAt(): number` — Expiration time, if set + +--- + +## Response Object + +Resource methods can return: + +1. **Plain data** — serialized using content negotiation +2. **`Response`-like object** with `status`, `headers`, and `data` or `body`: + +```javascript +// Redirect +return { status: 302, headers: { Location: '/new-location' } }; + +// Custom header with data +return { status: 200, headers: { 'X-Custom-Header': 'value' }, data: { message: 'ok' } }; +``` + +`body` must be a string, `Buffer`, Node.js stream, or `ReadableStream`. `data` is an object that will be serialized. + +Added in: v4.4.0 + +### Throwing Errors + +Uncaught errors are caught by the protocol handler. For REST, they produce error responses. Set `error.statusCode` to control the HTTP status: + +```javascript +if (!authorized) { + const error = new Error('Forbidden'); + error.statusCode = 403; + throw error; +} +``` + +--- + +## Context and Transactions + +Whenever you call other resources from within a resource method, pass `this` as the context argument to share the transaction and ensure atomicity: + +```javascript +export class BlogPost extends tables.BlogPost { + static loadAsInstance = false; + + post(target, data) { + // both writes share the same transaction + tables.Comment.put(data, this); + const post = this.update(target.id); + post.commentCount = (post.commentCount ?? 0) + 1; + } +} +``` + +See [Global APIs — transaction](./global-apis.md#transaction) for explicitly starting transactions outside of request handlers. + +--- + +## Legacy Instance Binding (V1) + +This documents the legacy `loadAsInstance = true` (or default pre-V2) behavior. The V2 API is recommended for all new code. + +When `loadAsInstance` is not `false` (or is explicitly `true`): + +- `this` is pre-bound to the matching record when instance methods are called. +- `this.getId()` returns the current record's primary key. +- Instance properties map directly to the record's fields. +- `get(query)` and `put(data, query)` have arguments in the older order (no `target` first). +- `allowRead()`, `allowUpdate()`, `allowCreate()`, `allowDelete()` methods are used for authorization. + +```javascript +export class MyExternalData extends Resource { + static loadAsInstance = true; + + async get() { + const response = await this.fetch(this.id); + return response; + } + + put(data) { + // write to external source + } + + delete() { + // delete from external source + } +} + +tables.MyCache.sourcedFrom(MyExternalData); +``` + +### Migration from V1 to V2 + +Updated `get`: + +```javascript +// V1 +async get(query) { + let id = this.getId(); + this.newProperty = 'value'; + return super.get(query); +} + +// V2 +static loadAsInstance = false; +async get(target) { + let id = target.id; + let record = await super.get(target); + return { ...record, newProperty: 'value' }; // record is frozen; spread to add properties +} +``` + +Updated authorization: + +```javascript +// V1 +allowRead(user) { + return !!user; +} + +// V2 +static loadAsInstance = false; +async get(target) { + if (!this.getContext().user) { + const error = new Error('Unauthorized'); + error.statusCode = 401; + throw error; + } + target.checkPermission = false; + return super.get(target); +} +``` + +Updated `post` (note reversed argument order): + +```javascript +// V1 +async post(data, query) { ... } + +// V2 +static loadAsInstance = false; +async post(target, data) { ... } // target is first +``` diff --git a/reference_versioned_docs/version-v4/security/api.md b/reference_versioned_docs/version-v4/security/api.md new file mode 100644 index 00000000..67c359f6 --- /dev/null +++ b/reference_versioned_docs/version-v4/security/api.md @@ -0,0 +1,23 @@ +--- +title: Security API +--- + + + + +# Security API + +Harper exposes security-related globals accessible in all component JavaScript modules without needing to import them. + +--- + +## `auth(username, password?): Promise` + +Returns the user object for the given username. If `password` is provided, it is verified before returning the user (throws on incorrect password). + +```javascript +const user = await auth('admin', 'secret'); +// user.role, user.username, etc. +``` + +This is useful for implementing custom authentication flows or verifying credentials in component code. For HTTP-level authentication configuration, see [Security Overview](./overview.md). diff --git a/reference_versioned_docs/version-v4/security/overview.md b/reference_versioned_docs/version-v4/security/overview.md index a47ed4a6..c2ddd569 100644 --- a/reference_versioned_docs/version-v4/security/overview.md +++ b/reference_versioned_docs/version-v4/security/overview.md @@ -41,6 +41,10 @@ Harper supports three authentication methods: - For Operations API configuration see [Operations API / Configuration / TLS](TODO:reference_versioned_docs/version-v4/configuration/operations.md#tls) - [Users and Roles](../users-and-roles/overview.md) — Role-Based Access Control (RBAC): defining roles, assigning permissions, and managing users. +## API + +- [Security API](./api.md) — JavaScript globals for security operations (e.g. `auth()`). + ## Default Behavior Out of the box, Harper: diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 159180ce..8bdf3811 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -268,6 +268,11 @@ "type": "doc", "id": "security/certificate-verification", "label": "Certificate Verification" + }, + { + "type": "doc", + "id": "security/api", + "label": "API" } ] }, @@ -347,6 +352,29 @@ } ] }, + { + "type": "category", + "label": "Resources", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "resources/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "resources/resource-api", + "label": "Resource API" + }, + { + "type": "doc", + "id": "resources/query-optimization", + "label": "Query Optimization" + } + ] + }, { "type": "category", "label": "Legacy", From 273d8395c6b8c8fdefaeda054f2ce0f294cd4b91 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 27 Mar 2026 08:44:53 -0600 Subject: [PATCH 26/51] follow up edits from reviews --- .../version-v4/database/api.md | 4 +- .../version-v4/database/data-loader.md | 8 +-- .../version-v4/database/schema.md | 54 +++++++++---------- .../version-v4/database/transaction.md | 2 +- .../version-v4/resources/overview.md | 2 +- .../resources/query-optimization.md | 8 +-- .../version-v4/resources/resource-api.md | 4 +- .../version-v4/rest/querying.md | 10 ++-- v4-docs-implementation-plan.md | 4 +- v4-docs-migration-map.md | 26 ++++----- 10 files changed, 62 insertions(+), 60 deletions(-) diff --git a/reference_versioned_docs/version-v4/database/api.md b/reference_versioned_docs/version-v4/database/api.md index b3b9408a..93162ed6 100644 --- a/reference_versioned_docs/version-v4/database/api.md +++ b/reference_versioned_docs/version-v4/database/api.md @@ -17,7 +17,7 @@ Harper exposes a set of global variables and functions that JavaScript code (in ```graphql # schema.graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey name: String price: Float } @@ -76,7 +76,7 @@ To define tables in a non-default database, use the `database` argument on the ` ```graphql type Events @table(database: "analytics") { - id: ID @primaryKey + id: Long @primaryKey eventType: String @indexed } ``` diff --git a/reference_versioned_docs/version-v4/database/data-loader.md b/reference_versioned_docs/version-v4/database/data-loader.md index 69745753..521f6153 100644 --- a/reference_versioned_docs/version-v4/database/data-loader.md +++ b/reference_versioned_docs/version-v4/database/data-loader.md @@ -88,7 +88,7 @@ dataLoader: ## Loading Behavior -The Data Loader runs on every full system start and every component deployment — this includes fresh installs, restarts of the Harper process, and redeployments of the component. It does **not** re-run on individual thread restarts within a running Harper process. +The Data Loader runs on every full system start and every component deployment — this includes fresh installs, restarts of the Harper process or threads, and redeployments of the component. Because the Data Loader runs on every startup and deployment, change detection is central to how it works safely. On each run: @@ -164,15 +164,15 @@ rest: true ```graphql type Country @table(database: "myapp") @export { - id: ID @primaryKey # ISO 3166-1 alpha-2, e.g. "US" + id: String @primaryKey # ISO 3166-1 alpha-2, e.g. "US" name: String @indexed region: String @indexed } type Region @table(database: "myapp") @export { - id: ID @primaryKey # ISO 3166-2, e.g. "US-CA" + id: String @primaryKey # ISO 3166-2, e.g. "US-CA" name: String @indexed - countryId: ID @indexed + countryId: String @indexed country: Country @relationship(from: countryId) } ``` diff --git a/reference_versioned_docs/version-v4/database/schema.md b/reference_versioned_docs/version-v4/database/schema.md index 0bd57adb..93cb5b7f 100644 --- a/reference_versioned_docs/version-v4/database/schema.md +++ b/reference_versioned_docs/version-v4/database/schema.md @@ -34,14 +34,14 @@ A minimal example: ```graphql type Dog @table { - id: ID @primaryKey + id: Long @primaryKey name: String breed: String age: Int } type Breed @table { - id: ID @primaryKey + id: Long @primaryKey name: String @indexed } ``` @@ -67,7 +67,7 @@ Marks a GraphQL type as a Harper database table. The type name becomes the table ```graphql type MyTable @table { - id: ID @primaryKey + id: Long @primaryKey } ``` @@ -85,23 +85,23 @@ Optional arguments: ```graphql # Override table name type Product @table(table: "products") { - id: ID @primaryKey + id: Long @primaryKey } # Place in a specific database type Order @table(database: "commerce") { - id: ID @primaryKey + id: Long @primaryKey } # Auto-expire records after 1 hour (e.g., a session cache) type Session @table(expiration: 3600) { - id: ID @primaryKey + id: Long @primaryKey userId: String } # Enable audit log for this table explicitly type AuditedRecord @table(audit: true) { - id: ID @primaryKey + id: Long @primaryKey value: String } @@ -120,7 +120,7 @@ Exposes the table as an externally accessible resource endpoint, available via R ```graphql type MyTable @table @export(name: "my-table") { - id: ID @primaryKey + id: Long @primaryKey } ``` @@ -132,7 +132,7 @@ Prevents records from including any properties beyond those explicitly declared ```graphql type StrictRecord @table @sealed { - id: ID @primaryKey + id: Long @primaryKey name: String } ``` @@ -169,7 +169,7 @@ Creates a secondary index on the attribute for fast querying. Required for filte ```graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey category: String @indexed price: Float @indexed } @@ -185,7 +185,7 @@ Automatically assigns a creation timestamp (Unix epoch milliseconds) to the attr ```graphql type Event @table { - id: ID @primaryKey + id: Long @primaryKey createdAt: Long @createdTime } ``` @@ -196,7 +196,7 @@ Automatically assigns a timestamp (Unix epoch milliseconds) each time the record ```graphql type Event @table { - id: ID @primaryKey + id: Long @primaryKey updatedAt: Long @updatedTime } ``` @@ -213,14 +213,14 @@ The foreign key is in this table, referencing the primary key of the target tabl ```graphql type RealityShow @table @export { - id: ID @primaryKey - networkId: ID @indexed # foreign key + id: Long @primaryKey + networkId: Long @indexed # foreign key network: Network @relationship(from: networkId) # many-to-one title: String @indexed } type Network @table @export { - id: ID @primaryKey + id: Long @primaryKey name: String @indexed # e.g. "Bravo", "Peacock", "Netflix" } ``` @@ -235,8 +235,8 @@ If the foreign key is an array, this establishes a many-to-many relationship (e. ```graphql type RealityShow @table @export { - id: ID @primaryKey - networkIds: [ID] @indexed + id: Long @primaryKey + networkIds: [Long] @indexed networks: [Network] @relationship(from: networkIds) } ``` @@ -247,7 +247,7 @@ The foreign key is in the target table, referencing the primary key of this tabl ```graphql type Network @table @export { - id: ID @primaryKey + id: Long @primaryKey name: String @indexed # e.g. "Bravo", "Peacock", "Netflix" shows: [RealityShow] @relationship(to: networkId) # one-to-many # shows like "Real Housewives of Atlanta", "The Traitors", "Vanderpump Rules" @@ -260,15 +260,15 @@ Both `from` and `to` can be specified together to define a relationship where ne ```graphql type OrderItem @table @export { - id: ID @primaryKey - orderId: ID @indexed - productSku: ID @indexed + id: Long @primaryKey + orderId: Long @indexed + productSku: Long @indexed product: Product @relationship(from: productSku, to: sku) # join on sku, not primary key } type Product @table @export { - id: ID @primaryKey - sku: ID @indexed + id: Long @primaryKey + sku: Long @indexed name: String } ``` @@ -283,7 +283,7 @@ The `@computed` directive marks a field as derived from other fields at query ti ```graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey price: Float taxRate: Float totalPrice: Float @computed(from: "price + (price * taxRate)") @@ -296,7 +296,7 @@ Computed properties can also be defined in JavaScript for complex logic: ```graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey totalPrice: Float @computed } ``` @@ -315,7 +315,7 @@ Computed properties can be indexed with `@indexed`, enabling custom indexing str ```graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey tags: String tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed } @@ -325,7 +325,7 @@ When using a JavaScript function for an indexed computed property, use the `vers ```graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey totalPrice: Float @computed(version: 1) @indexed } ``` diff --git a/reference_versioned_docs/version-v4/database/transaction.md b/reference_versioned_docs/version-v4/database/transaction.md index 65cbc697..d2a58968 100644 --- a/reference_versioned_docs/version-v4/database/transaction.md +++ b/reference_versioned_docs/version-v4/database/transaction.md @@ -139,7 +139,7 @@ You can enable or disable the audit log for individual tables using the `@table` ```graphql type Dog @table(audit: true) { - id: ID @primaryKey + id: Long @primaryKey name: String } ``` diff --git a/reference_versioned_docs/version-v4/resources/overview.md b/reference_versioned_docs/version-v4/resources/overview.md index 9dbad99f..c8fa7e41 100644 --- a/reference_versioned_docs/version-v4/resources/overview.md +++ b/reference_versioned_docs/version-v4/resources/overview.md @@ -43,7 +43,7 @@ Starting with a table definition in a `schema.graphql`: ```graphql # Omit the `@export` directive type MyTable @table { - id: ID @primaryKey + id: Long @primaryKey # ... } ``` diff --git a/reference_versioned_docs/version-v4/resources/query-optimization.md b/reference_versioned_docs/version-v4/resources/query-optimization.md index d8148eaa..36a00675 100644 --- a/reference_versioned_docs/version-v4/resources/query-optimization.md +++ b/reference_versioned_docs/version-v4/resources/query-optimization.md @@ -78,12 +78,12 @@ Example of an indexed foreign key that enables efficient join queries: ```graphql type Product @table { - id: ID @primaryKey - brandId: ID @indexed # foreign key — index this + id: Long @primaryKey + brandId: Long @indexed # foreign key — index this brand: Related @relation(from: "brandId") } type Brand @table { - id: ID @primaryKey + id: Long @primaryKey name: String @indexed # indexed — enables efficient brand.name queries products: Product @relation(to: "brandId") } @@ -156,7 +156,7 @@ Avoid storing an iterator and abandoning it (e.g. never calling `.next()` again ```graphql type Product @table { - id: ID @primaryKey + id: Long @primaryKey name: String @indexed # queried frequently category: String @indexed # queried frequently description: String # not indexed (rarely in conditions) diff --git a/reference_versioned_docs/version-v4/resources/resource-api.md b/reference_versioned_docs/version-v4/resources/resource-api.md index 3916261f..47fdca7b 100644 --- a/reference_versioned_docs/version-v4/resources/resource-api.md +++ b/reference_versioned_docs/version-v4/resources/resource-api.md @@ -24,7 +24,9 @@ The Resource API has two behavioral modes selected by the `loadAsInstance` stati | Version | `loadAsInstance` | Status | | ------------ | ---------------- | ------------------------------------- | | V2 (current) | `false` | Recommended for new code | -| V1 (legacy) | `true` | Preserved for backwards compatibility | +| V1 (legacy) | `true` (default) | Preserved for backwards compatibility | + +The default value of `loadAsInstance` is `true` (V1 behavior). To opt in to V2, you must explicitly set `static loadAsInstance = false` on your resource class. This page documents V2 behavior (`loadAsInstance = false`). For V1 (legacy instance binding) behavior and migration examples, see [Legacy Instance Binding](#legacy-instance-binding-v1). diff --git a/reference_versioned_docs/version-v4/rest/querying.md b/reference_versioned_docs/version-v4/rest/querying.md index 49662cbc..37ad0b4d 100644 --- a/reference_versioned_docs/version-v4/rest/querying.md +++ b/reference_versioned_docs/version-v4/rest/querying.md @@ -185,13 +185,13 @@ Harper supports querying across related tables through dot-syntax chained attrib ```graphql type Product @table @export { - id: ID @primaryKey + id: Long @primaryKey name: String - brandId: ID @indexed + brandId: Long @indexed brand: Brand @relation(from: "brandId") } type Brand @table @export { - id: ID @primaryKey + id: Long @primaryKey name: String products: [Product] @relation(to: "brandId") } @@ -222,9 +222,9 @@ Many-to-many relationships can be modeled with an array of foreign key values, w ```graphql type Product @table @export { - id: ID @primaryKey + id: Long @primaryKey name: String - resellerIds: [ID] @indexed + resellerIds: [Long] @indexed resellers: [Reseller] @relation(from: "resellerId") } ``` diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index 8698c3cb..704ec957 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -292,7 +292,7 @@ Based on migration map and reference plan, recommend this order. Each section is - `websockets.md` - `server-sent-events.md` -4. **Database** (`reference_versioned_docs/version-v4/database/`) +4. **Database** (`reference_versioned_docs/version-v4/database/`) — **Complete** - `overview.md` - `schema.md` - `api.md` _(JS globals: `tables`, `databases`, `transaction()`, `createBlob()`)_ @@ -303,7 +303,7 @@ Based on migration map and reference plan, recommend this order. Each section is - `compaction.md` - `transaction.md` -5. **Resources** (`reference_versioned_docs/version-v4/resources/`) +5. **Resources** (`reference_versioned_docs/version-v4/resources/`) — **Complete** - `overview.md` - `resource-api.md` - `global-apis.md` diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 1757f1dd..ef85907b 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -273,7 +273,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Additional Sources**: - `versioned_docs/version-4.7/reference/architecture.md` - Current `reference/architecture.md` -- **Status**: Not Started +- **Status**: Complete - **Notes**: Should explain Resources + Schema + Auto-REST relationship - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Database structure changes (single file per database) @@ -297,7 +297,7 @@ Broken out from the security section during migration — RBAC warrants its own - Computed properties: v4.4.0 - Blob storage: v4.5.0 - Vector indexing: v4.6.0 -- **Status**: Not Started +- **Status**: Complete - **Notes**: Large consolidation - may want to keep blobs/vectors separate - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Configurable schemas with GraphQL syntax @@ -311,7 +311,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/developers/applications/data-loader.md` - **Additional Sources**: Current `reference/data-loader.md` - **Version Annotations**: Added in v4.5.0 -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Data loader introduced @@ -319,7 +319,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/storage-algorithm.md` - **Additional Sources**: Current `reference/storage-algorithm.md` -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Storage performance improvements, compression by default @@ -330,13 +330,13 @@ Broken out from the security section during migration — RBAC warrants its own - `versioned_docs/version-4.7/developers/operations-api/jobs.md` - `versioned_docs/version-4.7/developers/operations-api/bulk-operations.md` - **Merge Required**: Yes - jobs/bulk operations content scattered -- **Status**: Not Started +- **Status**: Complete ### `reference/database/system-tables.md` - **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` - **Additional Sources**: Current `reference/analytics.md` -- **Status**: Not Started +- **Status**: Complete - **Notes**: System tables for analytics and other features ### `reference/database/compaction.md` @@ -344,7 +344,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/administration/compact.md` - **Additional Sources**: Current `reference/compact.md` - **Version Annotations**: Added in v4.3.0 -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Compact database functionality @@ -356,7 +356,7 @@ Broken out from the security section during migration — RBAC warrants its own - `versioned_docs/version-4.7/reference/blob.md` (createBlob() function) - **Merge Required**: Yes — combines tables/databases globals, transaction(), and createBlob() into one page - **Version Annotations**: Blob type added in v4.5.0 -- **Status**: In Progress +- **Status**: Complete - **Notes**: Covers the JS globals most relevant to database interaction. Server globals (server.http, server.ws, etc.) belong in the HTTP/Resources sections. - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Blob storage and createBlob() added @@ -370,7 +370,7 @@ Broken out from the security section during migration — RBAC warrants its own - `versioned_docs/version-4.1/audit-logging.md` - **Merge Required**: Yes - combines audit and transaction logging - **Version Annotations**: Transaction logging available since v4.1.0, audit logging since v4.1.0 -- **Status**: Not Started +- **Status**: Complete - **Notes**: Consolidated from separate audit and transaction logging pages - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Balanced audit log cleanup @@ -384,7 +384,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` - **Additional Sources**: Current `reference/resources/` folder -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced @@ -401,7 +401,7 @@ Broken out from the security section during migration — RBAC warrants its own - Basic Resource API: v4.2.0 - loadAsInstance changes: v4.4.0+ - Response objects: v4.4.0 -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CRDT support @@ -418,7 +418,7 @@ Broken out from the security section during migration — RBAC warrants its own - Current `reference/transactions.md` - **Merge Required**: Yes - consolidate global APIs (tables, databases, transactions, etc.) - **Version Annotations**: Various APIs added across versions -- **Status**: Not Started +- **Status**: Complete - **Notes**: Should reference out to http/api.md for `server` global - **Release Notes**: - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Table.getRecordCount() @@ -427,7 +427,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/resources/query-optimization.md` - **Additional Sources**: Current `reference/resources/query-optimization.md` -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Query optimizations From 7359fcbb9c1b1d5d24ef0b65f0f1b1be8d7e1963 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 27 Mar 2026 09:22:19 -0600 Subject: [PATCH 27/51] Components Section Migration (#460) * docs: migrate Components section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 * docs: add JavaScript Environment page to Components section Co-Authored-By: Claude Sonnet 4.6 * manual review * format --------- Co-authored-by: Claude Sonnet 4.6 --- .../components-link-placeholders.md | 65 +++ .../version-v4/components/applications.md | 336 ++++++++++++++ .../version-v4/components/extension-api.md | 186 ++++++++ .../components/javascript-environment.md | 83 ++++ .../version-v4/components/overview.md | 170 +++++++ .../version-v4/components/plugin-api.md | 423 ++++++++++++++++++ .../version-v4-sidebars.json | 33 ++ v4-docs-migration-map.md | 8 +- 8 files changed, 1300 insertions(+), 4 deletions(-) create mode 100644 migration-context/link-placeholders/components-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/components/applications.md create mode 100644 reference_versioned_docs/version-v4/components/extension-api.md create mode 100644 reference_versioned_docs/version-v4/components/javascript-environment.md create mode 100644 reference_versioned_docs/version-v4/components/overview.md create mode 100644 reference_versioned_docs/version-v4/components/plugin-api.md diff --git a/migration-context/link-placeholders/components-link-placeholders.md b/migration-context/link-placeholders/components-link-placeholders.md new file mode 100644 index 00000000..442454fe --- /dev/null +++ b/migration-context/link-placeholders/components-link-placeholders.md @@ -0,0 +1,65 @@ +# Link Placeholders for Components + +## reference_versioned_docs/version-v4/components/overview.md + +- Line (See Also section): `[TODO:reference_versioned_docs/version-v4/resources/resource-api.md]` + - Context: "See Also" section pointing to Resource API + - Target should be: `../resources/resource-api.md` + +- Line (See Also section): `[TODO:reference_versioned_docs/version-v4/database/schema.md]` + - Context: "See Also" section pointing to schema definition docs + - Target should be: `../database/schema.md` + +## reference_versioned_docs/version-v4/components/applications.md + +- Line (rest section): `[TODO:reference_versioned_docs/version-v4/rest/overview.md]` + - Context: Reference to REST interface documentation + - Target should be: `../rest/overview.md` + +- Line (graphqlSchema section): `[TODO:reference_versioned_docs/version-v4/database/schema.md]` + - Context: Reference to schema definition documentation + - Target should be: `../database/schema.md` + +- Line (jsResource section): `[TODO:reference_versioned_docs/version-v4/resources/resource-api.md]` + - Context: Reference to Resource API documentation + - Target should be: `../resources/resource-api.md` + +- Line (static section): `[TODO:reference_versioned_docs/version-v4/static-files/overview.md]` + - Context: Reference to static files documentation + - Target should be: `../static-files/overview.md` + +- Line (fastifyRoutes section): `[TODO:reference_versioned_docs/version-v4/fastify-routes/overview.md]` + - Context: Reference to Fastify routes documentation + - Target should be: `../fastify-routes/overview.md` + +- Line (graphql section): `[TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md]` + - Context: Reference to GraphQL querying documentation + - Target should be: `../graphql-querying/overview.md` + +- Line (loadEnv section): `[TODO:reference_versioned_docs/version-v4/environment-variables/overview.md]` + - Context: Reference to environment variables documentation + - Target should be: `../environment-variables/overview.md` + +- Line (roles section): `[TODO:reference_versioned_docs/version-v4/users-and-roles/configuration.md]` + - Context: Reference to users and roles configuration + - Target should be: `../users-and-roles/configuration.md` + +- Line (dataLoader section): `[TODO:reference_versioned_docs/version-v4/database/data-loader.md]` + - Context: Reference to data loader documentation + - Target should be: `../database/data-loader.md` + +## reference_versioned_docs/version-v4/components/extension-api.md + +- Line (Protocol Extension section): `[TODO:reference_versioned_docs/version-v4/http/api.md]` + - Context: Reference to the `server` global API for custom networking + - Target should be: `../http/api.md` + +## reference_versioned_docs/version-v4/components/plugin-api.md + +- Line (`scope.resources`): `[TODO:reference_versioned_docs/version-v4/resources/resource-api.md]` + - Context: Reference to Resource class in scope.resources + - Target should be: `../resources/resource-api.md` + +- Line (`scope.server`): `[TODO:reference_versioned_docs/version-v4/http/api.md]` + - Context: Reference to HTTP server global API + - Target should be: `../http/api.md` diff --git a/reference_versioned_docs/version-v4/components/applications.md b/reference_versioned_docs/version-v4/components/applications.md new file mode 100644 index 00000000..fb0185a4 --- /dev/null +++ b/reference_versioned_docs/version-v4/components/applications.md @@ -0,0 +1,336 @@ +--- +title: Applications +--- + + + + + + + +# Applications + +> The contents of this page primarily relate to **application** components. The term "components" in the Operations API and CLI generally refers to applications specifically. See the [Components Overview](./overview.md) for a full explanation of terminology. + +Harper offers several approaches to managing applications that differ between local development and remote Harper instances. + +## Local Development + +### `dev` and `run` Commands + +Added in: v4.2.0 + +The quickest way to run an application locally is with the `dev` command inside the application directory: + +```sh +harperdb dev . +``` + +The `dev` command watches for file changes and restarts Harper worker threads automatically. + +The `run` command is similar but does not watch for changes. Use `run` when the main thread needs to be restarted (the `dev` command does not restart the main thread). + +Stop either process with SIGINT (Ctrl+C). + +### Deploying to a Local Harper Instance + +To mimic interaction with a hosted Harper instance locally: + +1. Start Harper: `harperdb` +2. Deploy the application: + + ```sh + harperdb deploy \ + project= \ + package= \ + restart=true + ``` + + - Omit `target` to deploy to the locally running instance. + - Setting `package=` creates a symlink so file changes are picked up automatically between restarts. + - `restart=true` restarts worker threads after deploy. Use `restart=rolling` for a rolling restart. + +3. Use `harperdb restart` in another terminal to restart threads at any time. +4. Remove an application: `harperdb drop_component project=` + +> Not all [component operations](#operations-api) are available via CLI. When in doubt, use the Operations API via direct HTTP requests to the local Harper instance. + +Example: + +```sh +harperdb deploy \ + project=test-application \ + package=/Users/dev/test-application \ + restart=true +``` + +> Use `package=$(pwd)` if your current directory is the application directory. + +## Remote Management + +Managing applications on a remote Harper instance uses the same operations as local management. The key difference is specifying a `target` along with credentials: + +```sh +harperdb deploy \ + project= \ + package= \ + username= \ + password= \ + target= \ + restart=true \ + replicated=true +``` + +Credentials can also be provided via environment variables: + +```sh +export CLI_TARGET_USERNAME= +export CLI_TARGET_PASSWORD= +harperdb deploy \ + project= \ + package= \ + target= \ + restart=true \ + replicated=true +``` + +### Package Sources + +When deploying remotely, the `package` field can be any valid npm dependency value: + +- **Omit** `package` to package and deploy the current local directory +- **npm package**: `package="@harperdb/status-check"` +- **GitHub**: `package="HarperDB/status-check"` or `package="https://github.com/HarperDB/status-check"` +- **Private repo (SSH)**: `package="git+ssh://git@github.com:HarperDB/secret-app.git"` +- **Tarball**: `package="https://example.com/application.tar.gz"` + +When using git tags, use the `semver` directive for reliable versioning: + +``` +HarperDB/application-template#semver:v1.0.0 +``` + +Harper generates a `package.json` from component configurations and uses a form of `npm install` to resolve them. This is why specifying a local file path creates a symlink (changes are picked up between restarts without redeploying). + +For SSH-based private repos, use the [Add SSH Key](#add-ssh-key) operation to register keys first. + +## Dependency Management + +Harper uses `npm` and `package.json` for dependency management. + +During application loading, Harper follows this resolution order to determine how to install dependencies: + +1. If `node_modules` exists, or if `package.json` is absent — skip installation +2. Check the application's `harperdb-config.yaml` for `install: { command, timeout }` fields +3. Derive the package manager from [`package.json#devEngines#packageManager`](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#devengines) +4. Default to `npm install` + +The `add_component` and `deploy_component` operations support `install_command` and `install_timeout` fields for customizing this behavior. + +### Example `harperdb-config.yaml` with Custom Install + +```yaml +myApp: + package: ./my-app + install: + command: yarn install + timeout: 600000 # 10 minutes +``` + +### Example `package.json` with `devEngines` + +```json +{ + "name": "my-app", + "version": "1.0.0", + "devEngines": { + "packageManager": { + "name": "pnpm", + "onFail": "error" + } + } +} +``` + +> If you plan to use an alternative package manager, ensure it is installed on the host machine. Harper does not support the `"onFail": "download"` option and falls back to `"onFail": "error"` behavior. + +## Advanced: Direct `harperdb-config.yaml` Configuration + +Applications can be added to Harper by adding them directly to `harperdb-config.yaml` (located in the Harper `rootPath`, typically `~/hdb`). + +```yaml +status-check: + package: '@harperdb/status-check' +``` + +The entry name does not need to match a `package.json` dependency. Harper transforms these entries into a `package.json` and runs `npm install`. + +Any valid npm dependency specifier works: + +```yaml +myGithubComponent: + package: HarperDB-Add-Ons/package#v2.2.0 +myNPMComponent: + package: harperdb +myTarBall: + package: /Users/harper/cool-component.tar +myLocal: + package: /Users/harper/local +myWebsite: + package: https://harperdb-component +``` + +Harper generates a `package.json` and installs all components into `` (default: `~/hdb/components`). A symlink back to `/node_modules` is created for dependency resolution. + +> Use `harperdb get_configuration` to find the `rootPath` and `componentsRoot` values on your instance. + +## Operations API + +Component operations are restricted to `super_user` roles. + +### `add_component` + +Creates a new component project in the component root directory using a template. + +- `project` _(required)_ — Name of the project to create +- `template` _(optional)_ — Git URL of a template repository. Defaults to `https://github.com/HarperFast/application-template` +- `install_command` _(optional)_ — Install command. Defaults to `npm install` +- `install_timeout` _(optional)_ — Install timeout in milliseconds. Defaults to `300000` (5 minutes) +- `replicated` _(optional)_ — Replicate to all cluster nodes + +```json +{ + "operation": "add_component", + "project": "my-component" +} +``` + +### `deploy_component` + +Deploys a component using a package reference or a base64-encoded `.tar` payload. + +- `project` _(required)_ — Name of the project +- `package` _(optional)_ — Any valid npm reference (GitHub, npm, tarball, local path, URL) +- `payload` _(optional)_ — Base64-encoded `.tar` file content +- `force` _(optional)_ — Allow deploying over protected core components. Defaults to `false` +- `restart` _(optional)_ — `true` for immediate restart, `'rolling'` for sequential cluster restart +- `replicated` _(optional)_ — Replicate to all cluster nodes +- `install_command` _(optional)_ — Install command override +- `install_timeout` _(optional)_ — Install timeout override in milliseconds + +```json +{ + "operation": "deploy_component", + "project": "my-component", + "package": "HarperDB/application-template#semver:v1.0.0", + "replicated": true, + "restart": "rolling" +} +``` + +### `drop_component` + +Deletes a component project or a specific file within it. + +- `project` _(required)_ — Project name +- `file` _(optional)_ — Path relative to project folder. If omitted, deletes the entire project +- `replicated` _(optional)_ — Replicate deletion to all cluster nodes +- `restart` _(optional)_ — Restart Harper after dropping + +```json +{ + "operation": "drop_component", + "project": "my-component" +} +``` + +### `package_component` + +Packages a project folder as a base64-encoded `.tar` string. + +- `project` _(required)_ — Project name +- `skip_node_modules` _(optional)_ — Exclude `node_modules` from the package + +```json +{ + "operation": "package_component", + "project": "my-component", + "skip_node_modules": true +} +``` + +### `get_components` + +Returns all local component files, folders, and configuration from `harperdb-config.yaml`. + +```json +{ + "operation": "get_components" +} +``` + +### `get_component_file` + +Returns the contents of a file within a component project. + +- `project` _(required)_ — Project name +- `file` _(required)_ — Path relative to project folder +- `encoding` _(optional)_ — File encoding. Defaults to `utf8` + +```json +{ + "operation": "get_component_file", + "project": "my-component", + "file": "resources.js" +} +``` + +### `set_component_file` + +Creates or updates a file within a component project. + +- `project` _(required)_ — Project name +- `file` _(required)_ — Path relative to project folder +- `payload` _(required)_ — File content to write +- `encoding` _(optional)_ — File encoding. Defaults to `utf8` +- `replicated` _(optional)_ — Replicate update to all cluster nodes + +```json +{ + "operation": "set_component_file", + "project": "my-component", + "file": "test.js", + "payload": "console.log('hello world')" +} +``` + +### SSH Key Management + +For deploying from private repositories, SSH keys must be registered on the Harper instance. + +#### `add_ssh_key` + +- `name` _(required)_ — Key name +- `key` _(required)_ — Private key contents (must be ed25519; use `\n` for line breaks with trailing `\n`) +- `host` _(required)_ — Host alias for SSH config (used in `package` URL) +- `hostname` _(required)_ — Actual domain (e.g., `github.com`) +- `known_hosts` _(optional)_ — Public SSH keys of the host. Auto-retrieved for `github.com` +- `replicated` _(optional)_ — Replicate to all cluster nodes + +```json +{ + "operation": "add_ssh_key", + "name": "my-key", + "key": "-----BEGIN OPENSSH PRIVATE KEY-----\n...\n-----END OPENSSH PRIVATE KEY-----\n", + "host": "my-key.github.com", + "hostname": "github.com" +} +``` + +After adding a key, use the configured host in deploy package URLs: + +``` +"package": "git+ssh://git@my-key.github.com:my-org/my-repo.git#semver:v1.0.0" +``` + +Additional SSH key operations: `update_ssh_key`, `delete_ssh_key`, `list_ssh_keys`, `set_ssh_known_hosts`, `get_ssh_known_hosts`. diff --git a/reference_versioned_docs/version-v4/components/extension-api.md b/reference_versioned_docs/version-v4/components/extension-api.md new file mode 100644 index 00000000..09fa8770 --- /dev/null +++ b/reference_versioned_docs/version-v4/components/extension-api.md @@ -0,0 +1,186 @@ +--- +title: Extension API +--- + + + + +# Extension API + +> As of Harper v4.6, a new iteration of the extension system called **Plugins** was released. Plugins simplify the API and are recommended for new extension development. See the [Plugin API](./plugin-api.md) reference. Both extensions and plugins are supported; extensions are not yet deprecated. + +Extensions are components that provide reusable building blocks for applications. There are two key types: + +- **Resource Extensions** — Handle specific files or directories +- **Protocol Extensions** — More advanced extensions that can return a Resource Extension; primarily used for implementing higher-level protocols and custom networking handlers + +An extension is distinguished from a plain component by implementing one or more of the Resource Extension or Protocol Extension API methods. + +## Declaring an Extension + +All extensions must define a `config.yaml` with an `extensionModule` option pointing to the extension source code (path resolves from the module root directory): + +```yaml +extensionModule: ./extension.js +``` + +If written in TypeScript or another compiled language, point to the built output: + +```yaml +extensionModule: ./dist/index.js +``` + +## Resource Extension + +A Resource Extension processes specific files or directories. It is comprised of four function exports: + +| Method | Thread | Timing | +| ------------------- | ------------------ | ------------------------- | +| `handleFile()` | All worker threads | Executed on every restart | +| `handleDirectory()` | All worker threads | Executed on every restart | +| `setupFile()` | Main thread only | Once, at initial start | +| `setupDirectory()` | Main thread only | Once, at initial start | + +> **Important**: `harperdb restart` only restarts worker threads. Code in `setupFile()` and `setupDirectory()` runs only when Harper fully shuts down and starts again—not on `deploy` or `restart`. + +`handleFile()` and `setupFile()` have identical signatures. `handleDirectory()` and `setupDirectory()` have identical signatures. + +### Resource Extension Configuration + +Resource Extensions can be configured with `files` and `urlPath` options in `config.yaml`: + +- `files` — `string | string[] | FilesOptionObject` _(required)_ — Glob pattern(s) determining which files and directories are resolved. Harper uses [fast-glob](https://github.com/mrmlnc/fast-glob) for matching. + - `source` — `string | string[]` _(required when object form)_ — Glob pattern string(s) + - `only` — `'all' | 'files' | 'directories'` _(optional)_ — Restrict matching to a single entry type. Defaults to `'all'` + - `ignore` — `string[]` _(optional)_ — Patterns to exclude from matches + +- `urlPath` — `string` _(optional)_ — Base URL path prepended to resolved entries + - Starting with `./` (e.g., `'./static/'`) prepends the component name to the URL path + - Value of `.` uses the component name as the base path + - `..` is invalid and causes an error + - Leading/trailing slashes are handled automatically (`/static/`, `static/`, and `/static` are equivalent) + +Examples: + +```yaml +# Serve HTML files from web/ at the /static/ URL path +static: + files: 'web/*.html' + urlPath: 'static' + +# Load all GraphQL schemas from src/schema/ +graphqlSchema: + files: 'src/schema/*.graphql' + +# Match files in web/, excluding web/images/ +static: + files: + source: 'web/**/*' + ignore: ['web/images'] + +# Match only files (not directories) +myExtension: + files: + source: 'dir/**/*' + only: 'files' +``` + +### Resource Extension API + +At minimum, a Resource Extension must implement one of the four methods. As a standalone extension, export them directly: + +```js +// ESM +export function handleFile() {} +export function setupDirectory() {} + +// CJS +function handleDirectory() {} +function setupFile() {} +module.exports = { handleDirectory, setupFile }; +``` + +When returned by a Protocol Extension, define them on the returned object: + +```js +export function start() { + return { + handleFile() {}, + }; +} +``` + +#### `handleFile(contents, urlPath, absolutePath, resources): void | Promise` + +#### `setupFile(contents, urlPath, absolutePath, resources): void | Promise` + +Process individual files. Can be async. + +Parameters: + +- `contents` — `Buffer` — File contents +- `urlPath` — `string` — Recommended URL path for the file +- `absolutePath` — `string` — Absolute filesystem path +- `resources` — `Object` — Currently loaded resources + +#### `handleDirectory(urlPath, absolutePath, resources): boolean | void | Promise` + +#### `setupDirectory(urlPath, absolutePath, resources): boolean | void | Promise` + +Process directories. Can be async. + +If the function returns a truthy value, the component loading sequence ends and no other entries in the directory are processed. + +Parameters: + +- `urlPath` — `string` — Recommended URL path for the directory +- `absolutePath` — `string` — Absolute filesystem path +- `resources` — `Object` — Currently loaded resources + +## Protocol Extension + +A Protocol Extension is a more advanced form of Resource Extension, primarily used for implementing higher-level protocols (e.g., building and running a Next.js project) or adding custom networking handlers. + +Protocol Extensions use the [`server`](TODO:reference_versioned_docs/version-v4/http/api.md 'HTTP server global API') global API for custom networking. + +### Protocol Extension Configuration + +In addition to the `files`, `urlPath`, and `package` options, Protocol Extensions accept any additional configuration options defined under the extension name in `config.yaml`. These options are passed through to the `options` object of `start()` and `startOnMainThread()`. + +Many protocol extensions accept `port` and `securePort` options for configuring networking handlers. + +Example using `@harperdb/nextjs`: + +```yaml +'@harperdb/nextjs': + package: '@harperdb/nextjs' + files: './' + prebuilt: true + dev: false +``` + +### Protocol Extension API + +A Protocol Extension defines up to two methods: + +| Method | Thread | Timing | +| --------------------- | ------------------ | ------------------------- | +| `start()` | All worker threads | Executed on every restart | +| `startOnMainThread()` | Main thread only | Once, at initial start | + +Both methods receive the same `options` object and can return a Resource Extension (an object with any of the Resource Extension methods). + +#### `start(options): ResourceExtension | Promise` + +#### `startOnMainThread(options): ResourceExtension | Promise` + +Parameters: + +- `options` — `Object` — Extension configuration options from `config.yaml` + +Returns: An object implementing any of the Resource Extension methods + +## Version History + +- **v4.2.0** — Extension system introduced as part of the component architecture +- **v4.6.0** — New extension API with support for dynamic reloading; Plugin API introduced as the recommended alternative diff --git a/reference_versioned_docs/version-v4/components/javascript-environment.md b/reference_versioned_docs/version-v4/components/javascript-environment.md new file mode 100644 index 00000000..cd6f75b1 --- /dev/null +++ b/reference_versioned_docs/version-v4/components/javascript-environment.md @@ -0,0 +1,83 @@ +--- +title: JavaScript Environment +--- + + + +# JavaScript Environment + +Harper executes component JavaScript inside Node.js VM contexts — isolated module environments that share the same Node.js runtime but have their own global scope. This means each component runs in its own module context while still being able to access Harper's global APIs without any imports. + +## Module Loading + +Harper supports both ESM and CommonJS module formats. + +All Harper globals are available directly as global variables in any component module. They are also accessible by importing from the `harperdb` package, which can provide better TypeScript typing: + +```javascript +import { tables, Resource } from 'harperdb'; +``` + +```javascript +const { tables, Resource } = require('harperdb'); +``` + +For components in their own directory, link the package to your local `harperdb` installation: + +```bash +npm link harperdb +``` + +All installed components have `harperdb` automatically linked. + +## Global APIs + +### `tables` + +An object whose properties are the tables in the default database (`data`). Each table defined in `schema.graphql` is accessible as a property and implements the Resource API. + +See [Database API](../database/api.md) for full reference. + +### `databases` + +An object containing all databases defined in Harper. Each database is an object of its tables — `databases.data` is always equivalent to `tables`. + +See [Database API](../database/api.md) for full reference. + +### `transaction(fn)` + +Executes a function inside a database transaction. Changes made within the function are committed atomically, or rolled back if an error is thrown. + +See [Transactions](../database/transaction.md) for full reference. + +### `createBlob(data, options?)` + +Added in: v4.5.0 + +Creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, etc.) in `Blob`-typed schema fields. + +See [Database API](../database/api.md) for full reference. + +### `Resource` + +The base class for all Harper resources, including tables and custom data sources. Extend `Resource` to implement custom data providers. + +See [Resource API](../resources/resource-api.md) for full reference. + +### `server` + +Provides access to Harper's HTTP server middleware chain, WebSocket server, authentication helpers, resource registry, and cluster information. Also exposes `server.contentTypes` as an alias for the `contentTypes` global. + +See [HTTP API](../http/api.md) for full reference. + +### `contentTypes` + +A `Map` of MIME type strings to content type handler objects. Harper uses this map for content negotiation — deserializing incoming request bodies and serializing outgoing responses. You can register custom handlers to support additional formats. + +See [HTTP API](../http/api.md) for full reference. + +### `logger` + +Provides structured logging methods (`trace`, `debug`, `info`, `warn`, `error`, `fatal`, `notify`) that write to Harper's log file. Available without any imports in all component code. + +See [Logging API](../logging/api.md) for full reference. diff --git a/reference_versioned_docs/version-v4/components/overview.md b/reference_versioned_docs/version-v4/components/overview.md new file mode 100644 index 00000000..2dfe8d1d --- /dev/null +++ b/reference_versioned_docs/version-v4/components/overview.md @@ -0,0 +1,170 @@ +--- +title: Components +--- + + + + + + + +# Components + +**Components** are the high-level concept for modules that extend the Harper core platform with additional functionality. Components encapsulate both applications and extensions. + +> Harper is actively working to disambiguate component terminology. When you see "component" in the Operations API or CLI, it generally refers to an application. Documentation does its best to clarify which classification of component is meant wherever possible. + +## Concepts + +### Applications + +Added in: v4.2.0 + +**Applications** implement specific user-facing features or functionality. Applications are built on top of extensions and represent the end product that users interact with. For example, a Next.js application serving a web interface or an Apollo GraphQL server providing a GraphQL API are both applications. Also, a collection of Harper Schemas and/or custom Resources is also an application. + +### Extensions + +Added in: v4.2.0 + +**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality they implement. For example, the built-in `graphqlSchema` extension enables applications to define databases and tables using GraphQL schemas. The `@harperdb/nextjs` and `@harperdb/apollo` extensions provide building blocks for Next.js and Apollo applications respectively. + +Extensions can also depend on other extensions. For example, `@harperdb/apollo` depends on the built-in `graphqlSchema` extension to create a cache table for Apollo queries. + +### Plugins (Experimental) + +Added in: v4.6.0 (experimental) + +**Plugins** are a new iteration of the extension system introduced in v4.6. They are simultaneously a simplification and extensibility upgrade over extensions. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only export a single `handleApplication` method. + +Plugins are **experimental**. In time extensions will be deprecated in favor of plugins, but both are currently supported. See the [Plugin API](./plugin-api.md) reference for complete documentation. + +### Built-In vs. Custom Components + +**Built-in** components are included with Harper by default and referenced directly by name. Examples include `graphqlSchema`, `rest`, `jsResource`, `static`, and `loadEnv`. + +**Custom** components use external references—npm packages, GitHub repositories, or local directories—and are typically included as `package.json` dependencies. + +Harper does not currently include built-in applications. All applications are custom. + +## Architecture + +The relationship between applications, extensions, and Harper core: + +``` +Applications + ├── Next.js App → @harperdb/nextjs extension + ├── Apollo App → @harperdb/apollo extension + └── Custom Resource → jsResource + graphqlSchema + rest extensions + +Extensions + ├── Custom: @harperdb/nextjs, @harperdb/apollo, @harperdb/astro + └── Built-In: graphqlSchema, jsResource, rest, static, loadEnv, ... + +Core + └── database, file-system, networking +``` + +## Configuration + +Harper components are configured with a `config.yaml` file in the root of the component module directory. This file is how a component configures other components it depends on. Each entry starts with a component name, with configuration values indented below: + +```yaml +componentName: + option-1: value + option-2: value +``` + +### Default Configuration + +Components without a `config.yaml` get this default configuration automatically: + +```yaml +rest: true +graphqlSchema: + files: '*.graphql' +roles: + files: 'roles.yaml' +jsResource: + files: 'resources.js' +fastifyRoutes: + files: 'routes/*.js' + urlPath: '.' +static: + files: 'web/**' +``` + +If a `config.yaml` is provided, it **replaces** the default config entirely (no merging). + +### Custom Component Configuration + +Any custom component must be configured with a `package` option for Harper to load it. The component name must match a `package.json` dependency: + +```json +{ + "dependencies": { + "@harperdb/nextjs": "1.0.0" + } +} +``` + +```yaml +'@harperdb/nextjs': + package: '@harperdb/nextjs' + files: './' +``` + +The `package` value supports any valid npm dependency specifier: npm packages, GitHub repos, tarballs, local paths, and URLs. This is because Harper generates a `package.json` from component configurations and uses `npm install` to resolve them. + +### Extension and Plugin Configuration + +Extensions require an `extensionModule` option pointing to the extension source. Plugins require a `pluginModule` option. See [Extension API](./extension-api.md) and [Plugin API](./plugin-api.md) for details. + +## Built-In Extensions Reference + +| Name | Description | +| ------------------------------------------------- | ------------------------------------------------- | +| [`dataLoader`](../database/data-loader.md) | Load data from JSON/YAML files into Harper tables | +| [`fastifyRoutes`](../fastify-routes/overview.md) | Define custom endpoints with Fastify | +| [`graphql`](../graphql-querying/overview.md) | Enable GraphQL querying (experimental) | +| [`graphqlSchema`](../database/schema.md) | Define table schemas with GraphQL syntax | +| [`jsResource`](../resources/overview.md) | Define custom JavaScript-based resources | +| [`loadEnv`](../environment-variables/overview.md) | Load environment variables from `.env` files | +| [`rest`](../rest/overview.md) | Enable automatic REST endpoint generation | +| [`roles`](../users-and-roles/overview.md) | Define role-based access control from YAML files | +| [`static`](../static-files/overview.md) | Serve static files via HTTP | + +## Known Custom Components + +### Applications + +- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) +- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) +- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) + +### Extensions + +- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) +- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) +- [`@harperdb/astro`](https://github.com/HarperDB/astro) + +## Component Status Monitoring + +Added in: v4.7.0 + +Harper collects status from each component at load time and tracks any registered status change notifications. This provides visibility into the health and state of running components. + +## Evolution History + +- **v4.1.0** — Custom functions with worker threads (predecessor to components) +- **v4.2.0** — Component architecture introduced; Resource API, REST interface, MQTT, WebSockets, SSE, configurable schemas +- **v4.3.0** — Component configuration improvements +- **v4.6.0** — New extension API with dynamic reloading; Plugin API introduced (experimental) +- **v4.7.0** — Component status monitoring; further plugin API improvements + +## See Also + +- [Applications](./applications.md) — Managing and deploying applications +- [Extension API](./extension-api.md) — Building custom extensions +- [Plugin API](./plugin-api.md) — Building plugins (experimental, recommended for new extensions) +- [TODO:reference_versioned_docs/version-v4/resources/resource-api.md](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API reference') — Resource class interface +- [TODO:reference_versioned_docs/version-v4/database/schema.md](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition') — Defining schemas with graphqlSchema diff --git a/reference_versioned_docs/version-v4/components/plugin-api.md b/reference_versioned_docs/version-v4/components/plugin-api.md new file mode 100644 index 00000000..e91e2ded --- /dev/null +++ b/reference_versioned_docs/version-v4/components/plugin-api.md @@ -0,0 +1,423 @@ +--- +title: Plugin API +--- + + + + + +# Plugin API + +Added in: v4.6.0 (experimental) + +> The Plugin API is **experimental**. It is the recommended approach for building new extensions, and is intended to replace the [Extension API](./extension-api.md) in the future. Both systems are supported simultaneously. + +The Plugin API is a new iteration of the extension system that simplifies the interface. Instead of defining multiple methods (`start`, `startOnMainThread`, `handleFile`, `setupFile`, `handleDirectory`, `setupDirectory`), a plugin exports a single `handleApplication` method. + +## Declaring a Plugin + +A plugin must specify a `pluginModule` option in `config.yaml` pointing to the plugin source: + +```yaml +pluginModule: plugin.js +``` + +For TypeScript or other compiled languages, point to the built output: + +```yaml +pluginModule: ./dist/index.js +``` + +It is recommended that plugins have a `package.json` with standard JavaScript package metadata (name, version, type, etc.). Plugins are standard JavaScript packages and can be published to npm, written in TypeScript, or export executables. + +## Configuration + +General plugin configuration options: + +- `files` — `string | string[] | FilesOptionObject` _(optional)_ — Glob pattern(s) for files and directories handled by the plugin's default `EntryHandler`. Pattern rules: + - Cannot contain `..` or start with `/` + - `.` or `./` is transformed to `**/*` automatically +- `urlPath` — `string` _(optional)_ — Base URL path prepended to resolved `files` entries. Cannot contain `..`. If starts with `./` or is `.`, the plugin name is automatically prepended +- `timeout` — `number` _(optional)_ — Timeout in milliseconds for plugin operations. Takes precedence over the plugin's `defaultTimeout` and the system default (30 seconds) + +### File Entries + +```yaml +# Serve files from web/ at /static/ +static: + files: 'web/**/*' + urlPath: '/static/' + +# Load only *.graphql files from src/schema/ +graphqlSchema: + files: 'src/schema/*.graphql' + +# Exclude a subdirectory +static: + files: + source: 'web/**/*' + ignore: 'web/images/**' +``` + +> Note: Unlike the Extension API, the Plugin API `files` object does **not** support an `only` field. Use `entryEvent.entryType` or `entryEvent.eventType` in your handler instead. + +### Timeouts + +The system default timeout is **30 seconds**. If `handleApplication()` does not complete within this time, the component loader throws an error to prevent indefinite hanging. + +Plugins can override the system default by exporting a `defaultTimeout`: + +```typescript +export const defaultTimeout = 60_000; // 60 seconds +``` + +Users can override at the application level in `config.yaml`: + +```yaml +customPlugin: + package: '@org/custom-plugin' + files: 'foo.js' + timeout: 45_000 # 45 seconds +``` + +## TypeScript Support + +All classes and types are exported from the `harperdb` package: + +```typescript +import type { Scope, Config } from 'harperdb'; +``` + +## API Reference + +### Function: `handleApplication(scope: Scope): void | Promise` + +The only required export from a plugin module. The component loader executes it sequentially across all worker threads. It can be async and is awaited. + +Avoid event-loop-blocking operations within `handleApplication()`. + +```typescript +export function handleApplication(scope: Scope) { + // Use scope to access config, resources, server, etc. +} +``` + +Parameters: + +- `scope` — [`Scope`](#class-scope) — Access to the application's configuration, resources, and APIs + +The `handleApplication()` method cannot coexist with Extension API methods (`start`, `handleFile`, etc.). Defining both will throw an error. + +### Class: `Scope` + +Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +The central object passed to `handleApplication()`. Provides access to configuration, file entries, server APIs, and logging. + +#### Events + +- **`'close'`** — Emitted after `scope.close()` is called +- **`'error'`** — `error: unknown` — An error occurred +- **`'ready'`** — Emitted when the Scope is ready after loading the config file + +#### `scope.handleEntry([files][, handler])` + +Returns an [`EntryHandler`](#class-entryhandler) for watching and processing file system entries. + +Overloads: + +- `scope.handleEntry()` — Returns the default `EntryHandler` based on `files`/`urlPath` in `config.yaml` +- `scope.handleEntry(handler)` — Returns default `EntryHandler`, registers `handler` for the `'all'` event +- `scope.handleEntry(files)` — Returns a new `EntryHandler` for custom `files` config +- `scope.handleEntry(files, handler)` — Returns a new `EntryHandler` with a custom `'all'` event handler + +Example: + +```js +export function handleApplication(scope) { + // Default handler with inline callback + scope.handleEntry((entry) => { + switch (entry.eventType) { + case 'add': + case 'change': + // handle file add/change + break; + case 'unlink': + // handle file deletion + break; + } + }); + + // Custom handler for specific files + const tsHandler = scope.handleEntry({ files: 'src/**/*.ts' }); +} +``` + +#### `scope.requestRestart()` + +Request a Harper restart. Does not restart immediately—indicates to the user that a restart is required. Called automatically if no `scope.options.on('change')` handler is defined or if a required handler is missing. + +#### `scope.resources` + +Returns: `Map` — Currently loaded [Resource](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API') instances. + +#### `scope.server` + +Returns: `server` — Reference to the [server](TODO:reference_versioned_docs/version-v4/http/api.md 'HTTP server global API') global API. Use for registering HTTP middleware, custom networking, etc. + +#### `scope.options` + +Returns: [`OptionsWatcher`](#class-optionswatcher) — Access to the application's configuration options. Emits `'change'` events when the plugin's section of the config file is modified. + +#### `scope.logger` + +Returns: `logger` — Scoped logger instance. Recommended over the global `logger`. + +#### `scope.name` + +Returns: `string` — The plugin name as configured in `config.yaml`. + +#### `scope.directory` + +Returns: `string` — Root directory of the application component (where `config.yaml` lives). + +#### `scope.close()` + +Closes all associated entry handlers and the `scope.options` instance, emits `'close'`, and removes all listeners. + +### Class: `OptionsWatcher` + +Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +Provides reactive access to plugin configuration options, scoped to the specific plugin within the application's `config.yaml`. + +#### Events + +- **`'change'`** — `key: string[], value: ConfigValue, config: ConfigValue` — Emitted when a config option changes + - `key` — Option key split into parts (e.g., `foo.bar` → `['foo', 'bar']`) + - `value` — New value + - `config` — Entire plugin configuration object + +- **`'close'`** — Emitted when the watcher is closed +- **`'error'`** — `error: unknown` — An error occurred +- **`'ready'`** — `config: ConfigValue | undefined` — Emitted on initial load and after `'remove'` recovery +- **`'remove'`** — Config was removed (file deleted, config key deleted, or parse failure) + +Example: + +```typescript +export function handleApplication(scope) { + scope.options.on('change', (key, value, config) => { + if (key[0] === 'files') { + scope.logger.info(`Files option changed to: ${value}`); + } + }); +} +``` + +#### `options.get(key: string[]): ConfigValue | undefined` + +Get the value at a specific config key path. + +#### `options.getAll(): ConfigValue | undefined` + +Get the entire plugin configuration object. + +#### `options.getRoot(): Config | undefined` + +Get the root `config.yaml` object (all plugins and options). + +#### `options.close()` + +Close the watcher, preventing further events. + +### Class: `EntryHandler` + +Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +Created by [`scope.handleEntry()`](#scopehandleentry). Watches file system entries matching a `files` glob pattern and emits events as files are added, changed, or removed. + +#### Events + +- **`'all'`** — `entry: FileEntryEvent | DirectoryEntryEvent` — Emitted for all entry events (add, change, unlink, addDir, unlinkDir). This is the event registered by the `scope.handleEntry(handler)` shorthand. +- **`'add'`** — `entry: AddFileEvent` — File created or first seen +- **`'addDir'`** — `entry: AddDirectoryEvent` — Directory created or first seen +- **`'change'`** — `entry: ChangeFileEvent` — File modified +- **`'close'`** — Entry handler closed +- **`'error'`** — `error: unknown` — An error occurred +- **`'ready'`** — Handler ready and watching +- **`'unlink'`** — `entry: UnlinkFileEvent` — File deleted +- **`'unlinkDir'`** — `entry: UnlinkDirectoryEvent` — Directory deleted + +Recommended pattern for handling all events: + +```js +scope.handleEntry((entry) => { + switch (entry.eventType) { + case 'add': + break; + case 'change': + break; + case 'unlink': + break; + case 'addDir': + break; + case 'unlinkDir': + break; + } +}); +``` + +#### `entryHandler.name` + +Returns: `string` — Plugin name. + +#### `entryHandler.directory` + +Returns: `string` — Application root directory. + +#### `entryHandler.close()` + +Closes the entry handler, removing all listeners. Can be restarted with `update()`. + +#### `entryHandler.update(config: FilesOption | FileAndURLPathConfig)` + +Update the handler to watch new entries. Closes and recreates the underlying watcher while preserving existing listeners. Returns a Promise that resolves when the updated handler is ready. + +### Interfaces + +#### `FilesOption` + +`string | string[] | FilesOptionObject` + +#### `FilesOptionObject` + +- `source` — `string | string[]` _(required)_ — Glob pattern(s) +- `ignore` — `string | string[]` _(optional)_ — Patterns to exclude + +#### `FileAndURLPathConfig` + +- `files` — `FilesOption` _(required)_ +- `urlPath` — `string` _(optional)_ + +#### `BaseEntry` + +- `stats` — `fs.Stats | undefined` — File system stats (may be absent depending on event, entry type, and platform) +- `urlPath` — `string` — URL path of the entry, resolved from `files` + `urlPath` options +- `absolutePath` — `string` — Absolute filesystem path + +#### `FileEntry` + +Extends `BaseEntry` + +- `contents` — `Buffer` — File contents (automatically read) + +#### `EntryEvent` + +Extends `BaseEntry` + +- `eventType` — `string` — Type of event +- `entryType` — `'file' | 'directory'` — Entry type + +#### `AddFileEvent` + +- `eventType: 'add'` +- `entryType: 'file'` +- Extends `EntryEvent`, `FileEntry` + +#### `ChangeFileEvent` + +- `eventType: 'change'` +- `entryType: 'file'` +- Extends `EntryEvent`, `FileEntry` + +#### `UnlinkFileEvent` + +- `eventType: 'unlink'` +- `entryType: 'file'` +- Extends `EntryEvent`, `FileEntry` + +#### `FileEntryEvent` + +`AddFileEvent | ChangeFileEvent | UnlinkFileEvent` + +#### `AddDirectoryEvent` + +- `eventType: 'addDir'` +- `entryType: 'directory'` +- Extends `EntryEvent` + +#### `UnlinkDirectoryEvent` + +- `eventType: 'unlinkDir'` +- `entryType: 'directory'` +- Extends `EntryEvent` + +#### `DirectoryEntryEvent` + +`AddDirectoryEvent | UnlinkDirectoryEvent` + +#### `Config` + +`{ [key: string]: ConfigValue }` + +Parsed representation of `config.yaml`. + +#### `ConfigValue` + +`string | number | boolean | null | undefined | ConfigValue[] | Config` + +#### `onEntryEventHandler` + +`(entryEvent: FileEntryEvent | DirectoryEntryEvent): void` + +Function signature for the `'all'` event handler passed to `scope.handleEntry()`. + +## Example: Static File Server Plugin + +A simplified form of the built-in `static` extension demonstrating key Plugin API patterns: + +```js +export function handleApplication(scope) { + const staticFiles = new Map(); + + // React to config changes + scope.options.on('change', (key, value, config) => { + if (key[0] === 'files' || key[0] === 'urlPath') { + staticFiles.clear(); + scope.logger.info(`Static files reset due to change in ${key.join('.')}`); + } + }); + + // Handle file entry events + scope.handleEntry((entry) => { + if (entry.entryType === 'directory') return; + + switch (entry.eventType) { + case 'add': + case 'change': + staticFiles.set(entry.urlPath, entry.contents); + break; + case 'unlink': + staticFiles.delete(entry.urlPath); + break; + } + }); + + // Register HTTP middleware + scope.server.http( + (req, next) => { + if (req.method !== 'GET') return next(req); + + const file = staticFiles.get(req.pathname); + return file ? { statusCode: 200, body: file } : { statusCode: 404, body: 'File not found' }; + }, + { runFirst: true } + ); +} +``` + +## Version History + +- **v4.6.0** — Plugin API introduced (experimental) +- **v4.7.0** — Further improvements to the Plugin API diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 8bdf3811..8aca4f45 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -375,6 +375,39 @@ } ] }, + { + "type": "category", + "label": "Components", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "components/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "components/applications", + "label": "Applications" + }, + { + "type": "doc", + "id": "components/extension-api", + "label": "Extension API" + }, + { + "type": "doc", + "id": "components/plugin-api", + "label": "Plugin API" + }, + { + "type": "doc", + "id": "components/javascript-environment", + "label": "JavaScript Environment" + } + ] + }, { "type": "category", "label": "Legacy", diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index ef85907b..d0293de8 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -226,7 +226,7 @@ Broken out from the security section during migration — RBAC warrants its own - Components concept: v4.2.0 - Applications/Extensions: v4.3.0+ - Plugin API: v4.6.0 -- **Status**: Not Started +- **Status**: In Progress - **Notes**: This is a critical page that explains the evolution - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Custom functions with worker threads @@ -240,7 +240,7 @@ Broken out from the security section during migration — RBAC warrants its own - `versioned_docs/version-4.7/developers/applications/*.md` - Current `reference/components/applications.md` - **Merge Required**: Yes - application developer docs scattered across multiple files -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture, NPM/GitHub deployment @@ -249,7 +249,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/components/extensions.md` - **Additional Sources**: Current `reference/components/extensions.md` - **Version Annotations**: Extension API formalized around v4.4-4.5 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API @@ -258,7 +258,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/components/plugins.md` - **Additional Sources**: Current `reference/components/plugins.md` - **Version Annotations**: Added in v4.6.0 -- **Status**: Not Started +- **Status**: In Progress - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Plugin API introduced - [4.7.0](release-notes/v4-tucker/4.7.0.md) - Further plugin API improvements From ef09307e382a49b743aefee3a4ec0caa23665033 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 27 Mar 2026 09:34:26 -0600 Subject: [PATCH 28/51] Replication Section Migration (#461) * docs: migrate Replication section to v4 consolidated reference Adds overview.md, clustering.md, and sharding.md for the Replication section. Updates the v4 versioned sidebar and adds a link placeholder tracker. Co-Authored-By: Claude Sonnet 4.6 * docs(replication): remove all NATS/legacy clustering content Strips the Legacy NATS Clustering section from clustering.md, removes NATS-era callout blockquotes from overview.md, and drops version annotations that only make sense in a migration-history context. Co-Authored-By: Claude Sonnet 4.6 * manual review --------- Co-authored-by: Claude Sonnet 4.6 --- .../replication-link-placeholders.md | 11 + .../version-v4/replication/clustering.md | 318 ++++++++++++++++++ .../version-v4/replication/overview.md | 309 +++++++++++++++++ .../version-v4/replication/sharding.md | 209 ++++++++++++ .../version-v4-sidebars.json | 23 ++ 5 files changed, 870 insertions(+) create mode 100644 migration-context/link-placeholders/replication-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/replication/clustering.md create mode 100644 reference_versioned_docs/version-v4/replication/overview.md create mode 100644 reference_versioned_docs/version-v4/replication/sharding.md diff --git a/migration-context/link-placeholders/replication-link-placeholders.md b/migration-context/link-placeholders/replication-link-placeholders.md new file mode 100644 index 00000000..1208c8a6 --- /dev/null +++ b/migration-context/link-placeholders/replication-link-placeholders.md @@ -0,0 +1,11 @@ +# Link Placeholders for Replication + +## reference_versioned_docs/version-v4/replication/overview.md + +- Line ~84: `[Certificate Verification](TODO:reference_versioned_docs/version-v4/security/certificate-verification.md)` + - Context: Describing certificate revocation checking for replication connections — links to the cert verification config page for OCSP/CRL settings + - Target should be: `../security/certificate-verification.md` + +- Line ~183: `[Certificate Management](TODO:reference_versioned_docs/version-v4/security/certificate-management.md)` + - Context: "See Also" section footer link + - Target should be: `../security/certificate-management.md` diff --git a/reference_versioned_docs/version-v4/replication/clustering.md b/reference_versioned_docs/version-v4/replication/clustering.md new file mode 100644 index 00000000..98937eef --- /dev/null +++ b/reference_versioned_docs/version-v4/replication/clustering.md @@ -0,0 +1,318 @@ +--- +title: Clustering +--- + + + + +# Clustering + +Operations API for managing Harper's replication system. For an overview of how replication works, see [Replication Overview](./overview.md). For sharding configuration, see [Sharding](./sharding.md). + +All clustering operations require `super_user` role. + +--- + +### Add Node + +Adds a new Harper instance to the cluster. If `subscriptions` are provided, it creates the specified replication relationships between the nodes. Without `subscriptions`, a fully replicating system is created (all data in all databases). + +**Parameters**: + +- `operation` _(required)_ — must be `add_node` +- `hostname` or `url` _(required)_ — the hostname or URL of the node to add +- `verify_tls` _(optional)_ — whether to verify the TLS certificate. Set to `false` temporarily on fresh installs with self-signed certificates. Defaults to `true` +- `authorization` _(optional)_ — credentials for the node being added. Either an object with `username` and `password`, or an HTTP `Authorization` style string +- `retain_authorization` _(optional)_ — if `true`, stores credentials and uses them on every reconnect. Generally not recommended; prefer certificate-based authentication. Defaults to `false` +- `revoked_certificates` _(optional)_ — array of revoked certificate serial numbers that will not be accepted for any connections +- `shard` _(optional)_ — shard number for this node. Only needed when using sharding +- `start_time` _(optional)_ — ISO 8601 UTC datetime. If set, only data after this time is downloaded during initial synchronization instead of the entire database +- `subscriptions` _(optional)_ — explicit table-level replication relationships. This is optional (and discouraged). Each subscription is an object with: + - `database` — database name + - `table` — table name + - `subscribe` — if `true`, transactions on the remote table are replicated locally + - `publish` — if `true`, transactions on the local table are replicated to the remote node + +**Request**: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password" + } +} +``` + +**Response**: + +```json +{ + "message": "Successfully added 'server-two' to cluster" +} +``` + +> **Note**: `set_node` is an alias for `add_node`. + +--- + +### Update Node + +Modifies an existing Harper instance in the cluster. Will attempt to add the node if it does not exist. + +**Parameters**: + +- `operation` _(required)_ — must be `update_node` +- `hostname` _(required)_ — hostname of the remote node to update +- `revoked_certificates` _(optional)_ — array of revoked certificate serial numbers +- `shard` _(optional)_ — shard number to assign to this node +- `subscriptions` _(required)_ — array of subscription objects (same structure as `add_node`) + +**Request**: + +```json +{ + "operation": "update_node", + "hostname": "server-two" +} +``` + +**Response**: + +```json +{ + "message": "Successfully updated 'server-two'" +} +``` + +--- + +### Remove Node + +Removes a Harper node from the cluster and stops all replication to and from that node. + +**Parameters**: + +- `operation` _(required)_ — must be `remove_node` +- `hostname` _(required)_ — hostname of the node to remove + +**Request**: + +```json +{ + "operation": "remove_node", + "hostname": "server-two" +} +``` + +**Response**: + +```json +{ + "message": "Successfully removed 'server-two' from cluster" +} +``` + +--- + +### Cluster Status + +Returns an array of status objects from the cluster, including active WebSocket connections and replication timing statistics. + +Added in: v4.4.0; timing statistics added in v4.5.0 + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_status` + +**Request**: + +```json +{ + "operation": "cluster_status" +} +``` + +**Response**: + +```json +{ + "type": "cluster-status", + "connections": [ + { + "replicateByDefault": true, + "replicates": true, + "url": "wss://server-2.domain.com:9933", + "name": "server-2.domain.com", + "subscriptions": null, + "database_sockets": [ + { + "database": "data", + "connected": true, + "latency": 0.7, + "thread_id": 1, + "nodes": ["server-2.domain.com"], + "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", + "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", + "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", + "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" + } + ] + } + ], + "node_name": "server-1.domain.com", + "is_enabled": true +} +``` + +`database_sockets` shows the actual WebSocket connections between nodes — one socket per database per node. Timing fields: + +| Field | Description | +| ------------------------ | ----------------------------------------------------------------------------------------------------------------------------------- | +| `lastCommitConfirmed` | Last time a receipt of confirmation was received for an outgoing commit | +| `lastReceivedRemoteTime` | Timestamp (from the originating node) of the last received transaction | +| `lastReceivedLocalTime` | Local time when the last transaction was received. A gap between this and `lastReceivedRemoteTime` suggests the node is catching up | +| `sendingMessage` | Timestamp of the transaction actively being sent. Absent when waiting for the next transaction | + +--- + +### Configure Cluster + +Bulk creates or resets subscriptions for any number of remote nodes. **Resets and replaces any existing clustering setup.** + +**Parameters**: + +- `operation` _(required)_ — must be `configure_cluster` +- `connections` _(required)_ — array of node objects following the `add_node` schema + +**Request**: + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password2" + } + }, + { + "hostname": "server-three", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password3" + } + } + ] +} +``` + +**Response**: + +```json +{ + "message": "Cluster successfully configured." +} +``` + +--- + +### Cluster Set Routes + +Adds routes to the `replication.routes` configuration. Behaves as a PATCH/upsert — adds new routes while leaving existing routes untouched. + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_set_routes` +- `routes` _(required)_ — array of route strings (`wss://host:port`) or objects with `hostname` and `port` properties + +**Request**: + +```json +{ + "operation": "cluster_set_routes", + "routes": [ + "wss://server-two:9925", + { + "hostname": "server-three", + "port": 9930 + } + ] +} +``` + +**Response**: + +```json +{ + "message": "cluster routes successfully set", + "set": ["wss://server-two:9925", { "hostname": "server-three", "port": 9930 }], + "skipped": [] +} +``` + +--- + +### Cluster Get Routes + +Returns the replication routes from the Harper config file. + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_get_routes` + +**Request**: + +```json +{ + "operation": "cluster_get_routes" +} +``` + +**Response**: + +```json +["wss://server-two:9925", { "hostname": "server-three", "port": 9930 }] +``` + +--- + +### Cluster Delete Routes + +Removes routes from the Harper config file. + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_delete_routes` +- `routes` _(required)_ — array of route objects to remove + +**Request**: + +```json +{ + "operation": "cluster_delete_routes", + "routes": [ + { + "hostname": "server-three", + "port": 9930 + } + ] +} +``` + +**Response**: + +```json +{ + "message": "cluster routes successfully deleted", + "deleted": [{ "hostname": "server-three", "port": 9930 }], + "skipped": [] +} +``` diff --git a/reference_versioned_docs/version-v4/replication/overview.md b/reference_versioned_docs/version-v4/replication/overview.md new file mode 100644 index 00000000..3a5012f7 --- /dev/null +++ b/reference_versioned_docs/version-v4/replication/overview.md @@ -0,0 +1,309 @@ +--- +title: Replication Overview +--- + + + + +# Replication Overview + +Harper's replication system is designed to make distributed data replication fast and reliable across multiple nodes. You can build a distributed database that ensures high availability, disaster recovery, and data localization — all without complex setup. Nodes can be added or removed dynamically, you can choose which data to replicate, and you can monitor cluster health without jumping through hoops. + +## Peer-to-Peer Model + +Harper replication uses a peer-to-peer model where every node in your cluster can send data to and receive data from other nodes. Nodes communicate over WebSockets, allowing data to flow in both directions. Harper automatically manages these connections and subscriptions, so you don't need to manually track data consistency. Connections between nodes are secured and reliable by default. + +## Configuration + +### Connecting Nodes + +To connect nodes to each other, provide hostnames or URLs in the `replication` section of `harperdb-config.yaml`. Each node specifies its own hostname and the routes (other nodes) it should connect to: + +```yaml +replication: + hostname: server-one + routes: + - server-two + - server-three +``` + +Routes can also be specified as URLs or with explicit port numbers: + +```yaml +replication: + hostname: server-one + routes: + - wss://server-two:9933 + - hostname: server-three + port: 9933 +``` + +By default, replication connects on the secure port `9933`. + +```yaml +replication: + securePort: 9933 +``` + +You can also manage nodes dynamically through the [Operations API](./clustering.md#operations-api) without editing the config file. + +### Gossip Discovery + +Harper automatically replicates node information to other nodes in the cluster using [gossip-style discovery](https://highscalability.com/gossip-protocol-explained/). This means you only need to connect to one existing node in a cluster, and Harper will automatically detect and connect to all other nodes bidirectionally. + +### Data Selection + +By default, Harper replicates all data in all databases. You can narrow replication to specific databases: + +```yaml +replication: + databases: + - data + - system +``` + +All tables within a replicated database are replicated by default. To exclude a specific table from replication, set `replicate: false` in the table definition: + +```graphql +type LocalTableForNode @table(replicate: false) { + id: ID! + name: String! +} +``` + +Transactions are replicated atomically, which may span multiple tables. You can also control how many nodes data is replicated to using [sharding configuration](./sharding.md). + +## Securing Connections + +Harper supports PKI-based security and authorization for replication connections. Two authentication methods are supported: + +- **Certificate-based authentication** (recommended for production): Nodes are identified by the certificate's common name (CN) or Subject Alternative Names (SANs). +- **IP-based authentication** (for development/testing): Nodes are identified by IP address when using insecure connections. + +Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to ensure revoked certificates cannot be used. OCSP and CRL work automatically with certificates from public CAs when `enableRootCAs` is enabled. For self-signed certificates or private CAs without OCSP/CRL support, use Harper's manual certificate revocation feature. Certificate verification settings follow the same configuration as HTTP mTLS connections (see [Certificate Verification](TODO:reference_versioned_docs/version-v4/security/certificate-verification.md 'HTTP mTLS certificate verification configuration')). + +### Providing Your Own Certificates + +If you have certificates from a public or corporate CA, enable `enableRootCAs` so nodes validate against the standard root CA list: + +```yaml +replication: + enableRootCAs: true +``` + +Ensure the certificate's CN matches the node's hostname. + +### Setting Up Custom Certificates + +There are two ways to configure Harper with your own certificates: + +1. Use the `add_certificate` operation to upload them. +2. Specify certificate paths directly in `harperdb-config.yaml`: + +```yaml +tls: + certificate: /path/to/certificate.pem + certificateAuthority: /path/to/ca.pem + privateKey: /path/to/privateKey.pem +``` + +Harper will load the provided certificates into the certificate table and use them to secure and authenticate connections. If you have a publicly-signed certificate, you can omit the `certificateAuthority` and enable `enableRootCAs` to use the bundled Mozilla CA store instead. + +### Cross-Generated Certificates + +Harper can generate its own certificates for secure connections — useful when no existing certificates are available. When you run `add_node` over SSL with temporary credentials, Harper automatically handles certificate generation and signing: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password" + } +} +``` + +On a fresh install, set `verify_tls: false` temporarily to accept the self-signed certificate. Harper then: + +1. Creates a certificate signing request (CSR) and sends it to `server-two`. +2. `server-two` signs the CSR and returns the signed certificate and CA. +3. The signed certificate is stored for all future connections. + +Credentials are not stored — they are discarded immediately after use. You can also provide credentials in HTTP Authorization format (Basic, Token, or JWT). + +### Revoking Certificates + +Added in: v4.5.0 + +Certificates used in replication can be revoked using the certificate serial number. Use either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config: + +Via the operations API: + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "revoked_certificates": ["1769F7D6A"] +} +``` + +Via `harperdb-config.yaml`: + +```yaml +replication: + routes: + - hostname: server-three + port: 9930 + revokedCertificates: + - 1769F7D6A + - QA69C7E2S +``` + +### Insecure IP-Based Authentication + +For development, testing, or secure private networks, you can disable TLS and use IP addresses to authenticate nodes. Configure replication on an insecure port and set up IP-based routes: + +```yaml +replication: + port: 9933 + routes: + - 127.0.0.2 + - 127.0.0.3 +``` + +> **Warning**: Never use insecure connections for production systems accessible from the public internet. + +Loopback addresses (`127.0.0.X`) are a convenient way to run multiple nodes on a single machine for local development. + +## Controlling Replication Flow + +By default, Harper replicates all data in all databases with symmetric bidirectional flow. To restrict replication to one direction between certain nodes, set `sends` and `receives` on the route configuration: + +```yaml +replication: + databases: + - data + routes: + - host: node-two + replicates: + sends: false + receives: true + - host: node-three + replicates: + sends: true + receives: false +``` + +In this example, the local node only receives from `node-two` (one-way inbound) and only sends to `node-three` (one-way outbound). + +> **Note**: When using controlled flow replication, avoid replicating the `system` database. The `system` database contains node configurations, so replicating it would cause all nodes to have identical (and incorrect) route configurations. + +### Explicit Subscriptions + +By default, Harper automatically manages connections and subscriptions between nodes. Explicit subscriptions exist only for testing, debugging, and legacy migration — they should not be used for production replication and will likely be removed in v5. + +With explicit subscriptions, Harper no longer guarantees data consistency. If you want unidirectional replication, use [controlled replication flow](#controlling-replication-flow) instead. + +To explicitly subscribe, use `add_node` with subscription definitions: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "publish": true, + "subscribe": false + } + ] +} +``` + +Update a subscription with `update_node`: + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "publish": true, + "subscribe": true + } + ] +} +``` + +## Monitoring Replication + +Added in: v4.5.0 (cluster status timing statistics) + +Use `cluster_status` to monitor the state of replication: + +```json +{ + "operation": "cluster_status" +} +``` + +See [Clustering Operations](./clustering.md#cluster-status) for the full response schema and field descriptions. + +## Initial Synchronization and Resynchronization + +When a new node is added and its database has not been previously synced, Harper downloads the full database from the first node it connects to. After the initial sync completes, the node enters replication mode and receives incremental updates. + +If a node goes offline and comes back, it resynchronizes automatically to catch up on missed transactions. + +You can also specify a `start_time` in the `add_node` operation to limit the initial download to data since a given point in time: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "start_time": "2024-01-01T00:00:00.000Z" +} +``` + +## Replicated Transactions + +The following data operations are replicated across the cluster: + +- Insert +- Update +- Upsert +- Delete +- Bulk loads (CSV data load, CSV file load, CSV URL load, import from S3) + +**Destructive schema operations are not replicated**: `drop_database`, `drop_table`, and `drop_attribute` must be run on each node independently. + +Users and roles are not replicated across the cluster. + +Certain management operations — including component deployment and rolling restarts — can also be replicated across the cluster. + +## Inspecting Cluster Configuration + +Query the `hdb_nodes` system table to inspect the current known nodes and their configuration: + +```json +{ + "operation": "search_by_value", + "database": "system", + "table": "hdb_nodes", + "attribute": "name", + "value": "*" +} +``` + +The `hdb_certificate` table contains the certificates used for replication connections. + +## See Also + +- [Clustering Operations](./clustering.md) — Operations API for managing cluster nodes and subscriptions +- [Sharding](./sharding.md) — Distributing data across a subset of nodes +- [Certificate Management](TODO:reference_versioned_docs/version-v4/security/certificate-management.md 'Dynamic certificate management for replication') diff --git a/reference_versioned_docs/version-v4/replication/sharding.md b/reference_versioned_docs/version-v4/replication/sharding.md new file mode 100644 index 00000000..6625045a --- /dev/null +++ b/reference_versioned_docs/version-v4/replication/sharding.md @@ -0,0 +1,209 @@ +--- +title: Sharding +--- + + + + + +# Sharding + +Added in: v4.4.0 (provisional) + +Changed in: v4.5.0 — expanded sharding functionality: Harper now honors write requests with residency information that will not be stored on the local node, and nodes can be declaratively configured as part of a shard. + +Harper's replication system supports sharding — storing different data across different subsets of nodes — while still allowing data to be accessed from any node in the cluster. This enables horizontal scalability for storage and write performance, while maintaining optimal data locality and consistency. + +When sharding is configured, requests for records that don't reside on the handling node are automatically forwarded to the appropriate node transparently. Clients do not need to know where data is stored. + +By default (without sharding), Harper replicates all data to all nodes. + +## Approaches to Sharding + +There are two main approaches: + +**Dynamic sharding** — the location (residency) of records is determined dynamically based on where the record was written, the record's data, or a custom function. Records can be relocated dynamically based on where they are accessed. Residency information is specific to each record. + +**Static sharding** — each node is assigned to a specific numbered shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed. More predictable than dynamic sharding: data location is always determinable from the primary key. + +## Dynamic Sharding + +### Replication Count + +The simplest way to limit replication is to configure a replication count. Set `replicateTo` in the `replication` section of `harperdb-config.yaml` to specify how many additional nodes data should be replicated to: + +```yaml +replication: + replicateTo: 2 +``` + +This ensures each record is stored on three nodes total (the node that first stored it, plus two others). + +### Replication Control via REST Header + +With the REST interface, you can specify replication targets and confirmation requirements per request using the `X-Replicate-To` header: + +```http +PUT /MyTable/3 +X-Replicate-To: 2;confirm=1 +``` + +- `2` — replicate to two additional nodes +- `confirm=1` — wait for confirmation from one additional node before responding + +Specify exact destination nodes by hostname: + +```http +PUT /MyTable/3 +X-Replicate-To: node1,node2 +``` + +The `confirm` parameter can be combined with explicit node lists. + +### Replication Control via Operations API + +Specify `replicateTo` and `replicatedConfirmation` in the operation body: + +```json +{ + "operation": "update", + "schema": "dev", + "table": "MyTable", + "hashValues": [3], + "record": { + "name": "John Doe" + }, + "replicateTo": 2, + "replicatedConfirmation": 1 +} +``` + +Or specify explicit nodes: + +```jsonc +{ + // ... + "replicateTo": ["node-1", "node-2"], + // ... +} +``` + +### Programmatic Replication Control + +Set `replicateTo` and `replicatedConfirmation` programmatically in a resource method: + +```javascript +class MyTable extends tables.MyTable { + put(record) { + const context = this.getContext(); + context.replicateTo = 2; // or an array of node names + context.replicatedConfirmation = 1; + return super.put(record); + } +} +``` + +## Static Sharding + +### Basic Static Shard Configuration + +Assign a node to a numbered shard in `harperdb-config.yaml`: + +```yaml +replication: + shard: 1 +``` + +Or assign shards per route: + +```yaml +replication: + routes: + - hostname: node1 + shard: 1 + - hostname: node2 + shard: 2 +``` + +Or dynamically via the operations API by including `shard` in an `add_node` or `set_node` operation: + +```json +{ + "operation": "add_node", + "hostname": "node1", + "shard": 1 +} +``` + +Once shards are configured, use `setResidency` or `setResidencyById` (described below) to assign records to specific shards. + +## Custom Sharding + +### By Record Content (`setResidency`) + +Define a custom residency function that is called with the full record. Return an array of node hostnames or a shard number. + +With this approach, record metadata (including residency information) and indexed properties are replicated to all nodes, but the full record is only stored on the specified nodes. + +Return node hostnames: + +```javascript +MyTable.setResidency((record) => { + return record.id % 2 === 0 ? ['node1'] : ['node2']; +}); +``` + +Return a shard number (replicates to all nodes in that shard): + +```javascript +MyTable.setResidency((record) => { + return record.id % 2 === 0 ? 1 : 2; +}); +``` + +### By Primary Key Only (`setResidencyById`) + +Define a residency function based solely on the primary key. Records (including metadata) are only replicated to the specified nodes — metadata does not need to be replicated everywhere, which allows data to be retrieved without needing access to record data or metadata on the requesting node. + +Return a shard number: + +```javascript +MyTable.setResidencyById((id) => { + return id % 2 === 0 ? 1 : 2; +}); +``` + +Return node hostnames: + +```javascript +MyTable.setResidencyById((id) => { + return id % 2 === 0 ? ['node1'] : ['node2']; +}); +``` + +## Disabling Cross-Node Access + +By default, sharding allows data stored on specific nodes to be accessed from any node — requests are forwarded transparently. To disable this and only return data if it is stored on the local node, set `replicateFrom` to `false`. + +Via the operations API: + +```json +{ + "operation": "search_by_id", + "table": "MyTable", + "ids": [3], + "replicateFrom": false +} +``` + +Via the REST API: + +```http +GET /MyTable/3 +X-Replicate-From: none +``` + +## See Also + +- [Replication Overview](./overview.md) — How Harper's replication system works +- [Clustering Operations](./clustering.md) — Operations API for managing cluster nodes diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 8aca4f45..7513afcc 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -408,6 +408,29 @@ } ] }, + { + "type": "category", + "label": "Replication", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "replication/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "replication/clustering", + "label": "Clustering" + }, + { + "type": "doc", + "id": "replication/sharding", + "label": "Sharding" + } + ] + }, { "type": "category", "label": "Legacy", From 4f7fc1e03eb6dd99cff69c28fc4f8117afac67c4 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 27 Mar 2026 09:53:48 -0600 Subject: [PATCH 29/51] Operations API Migration (#462) * docs: migrate Operations API section to v4 consolidated reference Adds overview and operations reference for the Operations API section to the v4 consolidated reference structure. Co-Authored-By: Claude Sonnet 4.6 * manual review --------- Co-authored-by: Claude Sonnet 4.6 --- .../operations-api-link-placeholders.md | 131 +++ .../version-v4/operations-api/operations.md | 990 ++++++++++++++++++ .../version-v4/operations-api/overview.md | 87 ++ .../version-v4-sidebars.json | 18 + 4 files changed, 1226 insertions(+) create mode 100644 migration-context/link-placeholders/operations-api-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/operations-api/operations.md create mode 100644 reference_versioned_docs/version-v4/operations-api/overview.md diff --git a/migration-context/link-placeholders/operations-api-link-placeholders.md b/migration-context/link-placeholders/operations-api-link-placeholders.md new file mode 100644 index 00000000..fb679c8b --- /dev/null +++ b/migration-context/link-placeholders/operations-api-link-placeholders.md @@ -0,0 +1,131 @@ +# Link Placeholders for Operations API + +## reference_versioned_docs/version-v4/operations-api/overview.md + +- Line 18: `[TODO:reference_versioned_docs/version-v4/configuration/overview.md]` + - Context: Describing how to change the Operations API port via configuration + - Target should be: Configuration overview page (operationsApi.network section) + +- Line 24: `[Basic Authentication](TODO:reference_versioned_docs/version-v4/security/basic-authentication.md)` + - Context: Listing supported authentication methods for Operations API + - Target should be: Basic authentication reference page + +- Line 25: `[JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md)` + - Context: Listing supported authentication methods for Operations API + - Target should be: JWT authentication reference page + +- Line 41: `[Operations](./operations.md)` — **Internal link; already resolved** + +- Table row: `[Databases & Tables](./operations.md#databases--tables)` — **Internal link; already resolved** + +- Table row: `[NoSQL Operations](./operations.md#nosql-operations)` — **Internal link; already resolved** + +- Table row: `[Bulk Operations](./operations.md#bulk-operations)` — **Internal link; already resolved** + +- Table row: `[SQL Operations](./operations.md#sql-operations)` — **Internal link; already resolved** + +- Table row: `[Users & Roles](./operations.md#users--roles)` — **Internal link; already resolved** + +- Table row: `[Token Authentication](./operations.md#token-authentication)` — **Internal link; already resolved** + +- Table row: `[Components](./operations.md#components)` — **Internal link; already resolved** + +- Table row: `[Replication & Clustering](./operations.md#replication--clustering)` — **Internal link; already resolved** + +- Table row: `[Configuration](./operations.md#configuration)` — **Internal link; already resolved** + +- Table row: `[Jobs](./operations.md#jobs)` — **Internal link; already resolved** + +- Table row: `[Logs](./operations.md#logs)` — **Internal link; already resolved** + +- Table row: `[Certificate Management](./operations.md#certificate-management)` — **Internal link; already resolved** + +- Table row: `[Analytics](./operations.md#analytics)` — **Internal link; already resolved** + +- Table row: `[Registration & Licensing](./operations.md#registration--licensing)` — **Internal link; already resolved** + +--- + +## reference_versioned_docs/version-v4/operations-api/operations.md + +### Databases & Tables section + +- Line (Databases & Tables description): `[TODO:reference_versioned_docs/version-v4/database/overview.md]` + - Context: "Detailed documentation" link at top of Databases & Tables section + - Target should be: Database overview page + +### NoSQL Operations section + +- Line (NoSQL Operations description): `[TODO:reference_versioned_docs/version-v4/rest/querying.md]` + - Context: "Detailed documentation" link at top of NoSQL Operations section + - Target should be: REST querying reference (also covers NoSQL query patterns) + +### Bulk Operations section + +- Line (Bulk Operations description): `[TODO:reference_versioned_docs/version-v4/database/jobs.md]` + - Context: "Detailed documentation" link at top of Bulk Operations section + - Target should be: Database jobs reference page + +### SQL Operations section + +- Line (SQL Operations description): `[TODO:reference_versioned_docs/version-v4/legacy/sql]` + - Context: "Detailed documentation" link at top of SQL Operations section + - Target should be: Legacy SQL guide section + +### Users & Roles section + +- Two instances: `[TODO:reference_versioned_docs/version-v4/users-and-roles/operations.md]` + - Context: "Detailed documentation" link and inline "See" link in Users & Roles section + - Target should be: Users & Roles operations page + +### Token Authentication section + +- Line (Token Authentication description): `[TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md]` + - Context: "Detailed documentation" link at top of Token Authentication section + - Target should be: JWT authentication reference page + +### Components section + +- Line (Components description): `[TODO:reference_versioned_docs/version-v4/components/overview.md]` + - Context: "Detailed documentation" link at top of Components section + - Target should be: Components overview page + +- Line (Deprecated Custom Functions): `[TODO:reference_versioned_docs/version-v4/components/overview.md]` + - Context: Referring readers from deprecated custom-functions ops to modern equivalent + - Target should be: Components overview page + +### Replication & Clustering section + +- Line (Replication description): `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` + - Context: "Detailed documentation" link at top of Replication & Clustering section + - Target should be: Replication clustering reference page + +### Configuration section + +- Two instances: `[TODO:reference_versioned_docs/version-v4/configuration/overview.md]` + - Context: "Detailed documentation" link at top of Configuration section and inline reference + - Target should be: Configuration overview page + +### Jobs section + +- Two instances: `[TODO:reference_versioned_docs/version-v4/database/jobs.md]` + - Context: "Detailed documentation" link at top of Jobs section and inline reference + - Target should be: Database jobs reference page + +### Logs section + +- Line (Logs description): `[TODO:reference_versioned_docs/version-v4/logging/operations.md]` + - Context: "Detailed documentation" link at top of Logs section + - Target should be: Logging operations page + +### Certificate Management section + +- Line (Certificate Management description): `[TODO:reference_versioned_docs/version-v4/security/certificate-management.md]` + - Context: "Detailed documentation" link at top of Certificate Management section + - Target should be: Security certificate management page + +### Analytics section + +- Line (Analytics description): `[TODO:reference_versioned_docs/version-v4/analytics/operations.md]` + - Context: "Detailed documentation" link at top of Analytics section + - Target should be: Analytics operations page diff --git a/reference_versioned_docs/version-v4/operations-api/operations.md b/reference_versioned_docs/version-v4/operations-api/operations.md new file mode 100644 index 00000000..d6219815 --- /dev/null +++ b/reference_versioned_docs/version-v4/operations-api/operations.md @@ -0,0 +1,990 @@ +--- +title: Operations Reference +--- + + + + + + + + + + + + + + + + + + +# Operations Reference + +This page lists all available Operations API operations, grouped by category. Each entry links to the feature section where the full documentation lives. + +For endpoint and authentication setup, see the [Operations API Overview](./overview.md). + +--- + +## Databases & Tables + +Operations for managing databases, tables, and attributes. + +Detailed documentation: [Database Overview](../database/overview.md) + +| Operation | Description | Role Required | +| ------------------- | ------------------------------------------------------------------- | ------------- | +| `describe_all` | Returns definitions of all databases and tables, with record counts | any | +| `describe_database` | Returns all table definitions for a specified database | any | +| `describe_table` | Returns the definition of a specified table | any | +| `create_database` | Creates a new database | super_user | +| `drop_database` | Drops a database and all its tables/records | super_user | +| `create_table` | Creates a new table with optional schema and expiration | super_user | +| `drop_table` | Drops a table and all its records | super_user | +| `create_attribute` | Adds a new attribute to a table | super_user | +| `drop_attribute` | Removes an attribute and all its values from a table | super_user | +| `get_backup` | Returns a binary snapshot of a database for backup purposes | super_user | + +### `describe_all` + +Returns the definitions of all databases and tables within the database. Record counts above 5000 records are estimated; the response includes `estimated_record_range` when estimated. To force an exact count (requires full table scan), include `"exact_count": true`. + +```json +{ "operation": "describe_all" } +``` + +### `describe_database` + +Returns all table definitions within the specified database. + +```json +{ "operation": "describe_database", "database": "dev" } +``` + +### `describe_table` + +Returns the definition of a specific table. + +```json +{ "operation": "describe_table", "table": "dog", "database": "dev" } +``` + +### `create_database` + +Creates a new database. + +```json +{ "operation": "create_database", "database": "dev" } +``` + +### `drop_database` + +Drops a database and all its tables/records. Supports `"replicated": true` to propagate to all cluster nodes. + +```json +{ "operation": "drop_database", "database": "dev" } +``` + +### `create_table` + +Creates a new table. Optional fields: `database` (defaults to `data`), `attributes` (array defining schema), `expiration` (TTL in seconds). + +```json +{ + "operation": "create_table", + "database": "dev", + "table": "dog", + "primary_key": "id" +} +``` + +### `drop_table` + +Drops a table and all associated records. Supports `"replicated": true`. + +```json +{ "operation": "drop_table", "database": "dev", "table": "dog" } +``` + +### `create_attribute` + +Creates a new attribute within a table. Harper auto-creates attributes on insert/update, but this can be used to pre-define them (e.g., for role-based permission setup). + +```json +{ + "operation": "create_attribute", + "database": "dev", + "table": "dog", + "attribute": "is_adorable" +} +``` + +### `drop_attribute` + +Drops an attribute and all its values from the specified table. + +```json +{ + "operation": "drop_attribute", + "database": "dev", + "table": "dog", + "attribute": "is_adorable" +} +``` + +### `get_backup` + +Returns a binary snapshot of the specified database (or individual table). Safe for backup while Harper is running. Specify `"table"` for a single table or `"tables"` for a set. + +```json +{ "operation": "get_backup", "database": "dev" } +``` + +--- + +## NoSQL Operations + +Operations for inserting, updating, deleting, and querying records using NoSQL. + +Detailed documentation: [REST Querying Reference](../rest/querying.md) + +| Operation | Description | Role Required | +| ---------------------- | ------------------------------------------------------------------------- | ------------- | +| `insert` | Inserts one or more records | any | +| `update` | Updates one or more records by primary key | any | +| `upsert` | Inserts or updates records | any | +| `delete` | Deletes records by primary key | any | +| `search_by_id` | Retrieves records by primary key | any | +| `search_by_value` | Retrieves records matching a value on any attribute | any | +| `search_by_conditions` | Retrieves records matching complex conditions with sorting and pagination | any | + +### `insert` + +Inserts one or more records. If a primary key is not provided, a GUID or auto-increment value is generated. + +```json +{ + "operation": "insert", + "database": "dev", + "table": "dog", + "records": [{ "id": 1, "dog_name": "Penny" }] +} +``` + +### `update` + +Updates one or more records. Primary key must be supplied for each record. + +```json +{ + "operation": "update", + "database": "dev", + "table": "dog", + "records": [{ "id": 1, "weight_lbs": 38 }] +} +``` + +### `upsert` + +Updates existing records and inserts new ones. Matches on primary key if provided. + +```json +{ + "operation": "upsert", + "database": "dev", + "table": "dog", + "records": [{ "id": 1, "weight_lbs": 40 }] +} +``` + +### `delete` + +Deletes records by primary key values. + +```json +{ + "operation": "delete", + "database": "dev", + "table": "dog", + "ids": [1, 2] +} +``` + +### `search_by_id` + +Returns records matching the given primary key values. Use `"get_attributes": ["*"]` to return all attributes. + +```json +{ + "operation": "search_by_id", + "database": "dev", + "table": "dog", + "ids": [1, 2], + "get_attributes": ["dog_name", "breed_id"] +} +``` + +### `search_by_value` + +Returns records with a matching value on any attribute. Supports wildcards (e.g., `"Ky*"`). + +```json +{ + "operation": "search_by_value", + "database": "dev", + "table": "dog", + "attribute": "owner_name", + "value": "Ky*", + "get_attributes": ["id", "dog_name"] +} +``` + +### `search_by_conditions` + +Returns records matching one or more conditions. Supports `operator` (`and`/`or`), `offset`, `limit`, nested `conditions` groups, and `sort` with multi-level tie-breaking. + +```json +{ + "operation": "search_by_conditions", + "database": "dev", + "table": "dog", + "operator": "and", + "limit": 10, + "get_attributes": ["*"], + "conditions": [{ "attribute": "age", "comparator": "between", "value": [5, 8] }] +} +``` + +--- + +## Bulk Operations + +Operations for bulk import/export of data. + +Detailed documentation: [Database Jobs](../database/jobs.md) + +| Operation | Description | Role Required | +| ----------------------- | -------------------------------------------------------------- | ------------- | +| `export_local` | Exports query results to a local file in JSON or CSV | super_user | +| `csv_data_load` | Ingests CSV data provided inline | any | +| `csv_file_load` | Ingests CSV data from a server-local file path | any | +| `csv_url_load` | Ingests CSV data from a URL | any | +| `export_to_s3` | Exports query results to AWS S3 | super_user | +| `import_from_s3` | Imports CSV or JSON data from AWS S3 | any | +| `delete_records_before` | Deletes records older than a given timestamp (local node only) | super_user | + +All bulk import/export operations are asynchronous and return a job ID. Use [`get_job`](#get_job) to check status. + +### `export_local` + +Exports query results to a local path on the server. Formats: `json` or `csv`. + +```json +{ + "operation": "export_local", + "format": "json", + "path": "/data/", + "search_operation": { "operation": "sql", "sql": "SELECT * FROM dev.dog" } +} +``` + +### `csv_data_load` + +Ingests inline CSV data. Actions: `insert` (default), `update`, `upsert`. + +```json +{ + "operation": "csv_data_load", + "database": "dev", + "table": "dog", + "action": "insert", + "data": "id,name\n1,Penny\n" +} +``` + +### `csv_file_load` + +Ingests CSV from a file path on the server running Harper. + +```json +{ + "operation": "csv_file_load", + "database": "dev", + "table": "dog", + "file_path": "/home/user/imports/dogs.csv" +} +``` + +### `csv_url_load` + +Ingests CSV from a URL. + +```json +{ + "operation": "csv_url_load", + "database": "dev", + "table": "dog", + "csv_url": "https://example.com/dogs.csv" +} +``` + +### `export_to_s3` + +Exports query results to an AWS S3 bucket as JSON or CSV. + +```json +{ + "operation": "export_to_s3", + "format": "json", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET", + "bucket": "my-bucket", + "key": "dogs.json", + "region": "us-east-1" + }, + "search_operation": { "operation": "sql", "sql": "SELECT * FROM dev.dog" } +} +``` + +### `import_from_s3` + +Imports CSV or JSON from an AWS S3 bucket. File must include a valid `.csv` or `.json` extension. + +```json +{ + "operation": "import_from_s3", + "database": "dev", + "table": "dog", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET", + "bucket": "my-bucket", + "key": "dogs.csv", + "region": "us-east-1" + } +} +``` + +### `delete_records_before` + +Deletes records older than the specified timestamp from the local node only. Clustered nodes retain their data. + +```json +{ + "operation": "delete_records_before", + "date": "2021-01-25T23:05:27.464", + "schema": "dev", + "table": "dog" +} +``` + +--- + +## SQL Operations + +Operations for executing SQL statements. + +:::warning +Harper SQL is intended for data investigation and use cases where performance is not a priority. For production workloads, use NoSQL or REST operations. SQL performance optimizations are on the roadmap. +::: + +Detailed documentation: [TODO:reference_versioned_docs/version-v4/legacy/sql 'Legacy SQL reference'] + +| Operation | Description | Role Required | +| --------- | ------------------------------------------------------------------ | ------------- | +| `sql` | Executes a SQL `SELECT`, `INSERT`, `UPDATE`, or `DELETE` statement | any | + +### `sql` + +Executes a standard SQL statement. + +```json +{ "operation": "sql", "sql": "SELECT * FROM dev.dog WHERE id = 1" } +``` + +--- + +## Users & Roles + +Operations for managing users and role-based access control (RBAC). + +Detailed documentation: [Users & Roles Operations](../users-and-roles/operations.md) + +| Operation | Description | Role Required | +| ------------ | --------------------------------------------------- | ------------- | +| `list_roles` | Returns all roles | super_user | +| `add_role` | Creates a new role with permissions | super_user | +| `alter_role` | Modifies an existing role's permissions | super_user | +| `drop_role` | Deletes a role (role must have no associated users) | super_user | +| `list_users` | Returns all users | super_user | +| `user_info` | Returns data for the authenticated user | any | +| `add_user` | Creates a new user | super_user | +| `alter_user` | Modifies an existing user's credentials or role | super_user | +| `drop_user` | Deletes a user | super_user | + +### `list_roles` + +Returns all roles defined in the instance. + +```json +{ "operation": "list_roles" } +``` + +### `add_role` + +Creates a new role with the specified permissions. The `permission` object maps database names to table-level access rules (`read`, `insert`, `update`, `delete`). Set `super_user: true` to grant full access. + +```json +{ + "operation": "add_role", + "role": "developer", + "permission": { + "super_user": false, + "dev": { + "tables": { + "dog": { "read": true, "insert": true, "update": true, "delete": false } + } + } + } +} +``` + +### `alter_role` + +Modifies an existing role's name or permissions. Requires the role's `id` (returned by `list_roles`). + +```json +{ + "operation": "alter_role", + "id": "f92162e2-cd17-450c-aae0-372a76859038", + "role": "senior_developer", + "permission": { + "super_user": false, + "dev": { + "tables": { + "dog": { "read": true, "insert": true, "update": true, "delete": true } + } + } + } +} +``` + +### `drop_role` + +Deletes a role. The role must have no associated users before it can be dropped. + +```json +{ "operation": "drop_role", "id": "f92162e2-cd17-450c-aae0-372a76859038" } +``` + +### `list_users` + +Returns all users. + +```json +{ "operation": "list_users" } +``` + +### `user_info` + +Returns data for the currently authenticated user. + +```json +{ "operation": "user_info" } +``` + +### `add_user` + +Creates a new user. `username` cannot be changed after creation. `password` is stored encrypted. + +```json +{ + "operation": "add_user", + "role": "developer", + "username": "hdb_user", + "password": "password", + "active": true +} +``` + +### `alter_user` + +Modifies an existing user's password, role, or active status. All fields except `username` are optional. + +```json +{ + "operation": "alter_user", + "username": "hdb_user", + "password": "new_password", + "role": "senior_developer", + "active": true +} +``` + +### `drop_user` + +Deletes a user by username. + +```json +{ "operation": "drop_user", "username": "hdb_user" } +``` + +See [Users & Roles Operations](../users-and-roles/operations.md) for full documentation including permission object structure. + +--- + +## Token Authentication + +Operations for JWT token creation and refresh. + +Detailed documentation: [JWT Authentication](../security/jwt-authentication.md) + +| Operation | Description | Role Required | +| ------------------------------ | ------------------------------------------------------- | ---------------------- | +| `create_authentication_tokens` | Creates an operation token and refresh token for a user | none (unauthenticated) | +| `refresh_operation_token` | Creates a new operation token from a refresh token | any | + +### `create_authentication_tokens` + +Does not require prior authentication. Returns `operation_token` (short-lived JWT) and `refresh_token` (long-lived JWT). + +```json +{ + "operation": "create_authentication_tokens", + "username": "my-user", + "password": "my-password" +} +``` + +### `refresh_operation_token` + +Creates a new operation token from an existing refresh token. + +```json +{ + "operation": "refresh_operation_token", + "refresh_token": "EXISTING_REFRESH_TOKEN" +} +``` + +--- + +## Components + +Operations for deploying and managing Harper components (applications, extensions, plugins). + +Detailed documentation: [Components Overview](../components/overview.md) + +| Operation | Description | Role Required | +| ---------------------- | ----------------------------------------------------------------------- | ------------- | +| `add_component` | Creates a new component project from a template | super_user | +| `deploy_component` | Deploys a component via payload (tar) or package reference (NPM/GitHub) | super_user | +| `package_component` | Packages a component project into a base64-encoded tar | super_user | +| `drop_component` | Deletes a component or a file within a component | super_user | +| `get_components` | Lists all component files and config | super_user | +| `get_component_file` | Returns the contents of a file within a component | super_user | +| `set_component_file` | Creates or updates a file within a component | super_user | +| `add_ssh_key` | Adds an SSH key for deploying from private repositories | super_user | +| `update_ssh_key` | Updates an existing SSH key | super_user | +| `delete_ssh_key` | Deletes an SSH key | super_user | +| `list_ssh_keys` | Lists all configured SSH key names | super_user | +| `set_ssh_known_hosts` | Overwrites the SSH known_hosts file | super_user | +| `get_ssh_known_hosts` | Returns the contents of the SSH known_hosts file | super_user | +| `install_node_modules` | _(Deprecated)_ Run npm install on component projects | super_user | + +### `deploy_component` + +Deploys a component. The `package` option accepts any valid NPM reference including GitHub repos (`HarperDB/app#semver:v1.0.0`), tarballs, or NPM packages. The `payload` option accepts a base64-encoded tar string from `package_component`. Supports `"replicated": true` and `"restart": true` or `"restart": "rolling"`. + +```json +{ + "operation": "deploy_component", + "project": "my-app", + "package": "my-org/my-app#semver:v1.2.3", + "replicated": true, + "restart": "rolling" +} +``` + +### `add_ssh_key` + +Adds an SSH key (must be ed25519) for authenticating deployments from private repositories. + +```json +{ + "operation": "add_ssh_key", + "name": "my-key", + "key": "-----BEGIN OPENSSH PRIVATE KEY-----\n...\n-----END OPENSSH PRIVATE KEY-----\n", + "host": "my-key.github.com", + "hostname": "github.com" +} +``` + +--- + +## Replication & Clustering + +Operations for configuring and managing Harper cluster replication. + +Detailed documentation: [Replication & Clustering](../replication/clustering.md) + +| Operation | Description | Role Required | +| ----------------------- | --------------------------------------------------------------- | ------------- | +| `add_node` | Adds a Harper instance to the cluster | super_user | +| `update_node` | Modifies an existing node's subscriptions | super_user | +| `remove_node` | Removes a node from the cluster | super_user | +| `cluster_status` | Returns current cluster connection status | super_user | +| `configure_cluster` | Bulk-creates/resets cluster subscriptions across multiple nodes | super_user | +| `cluster_set_routes` | Adds routes to the replication routes config (PATCH/upsert) | super_user | +| `cluster_get_routes` | Returns the current replication routes config | super_user | +| `cluster_delete_routes` | Removes routes from the replication routes config | super_user | + +### `add_node` + +Adds a remote Harper node to the cluster. If `subscriptions` are not provided, a fully replicating cluster is created. Optional fields: `verify_tls`, `authorization`, `retain_authorization`, `revoked_certificates`, `shard`. + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { "username": "admin", "password": "password" } +} +``` + +### `cluster_status` + +Returns connection state for all cluster nodes, including per-database socket status and replication timing statistics (`lastCommitConfirmed`, `lastReceivedRemoteTime`, `lastReceivedLocalTime`). + +```json +{ "operation": "cluster_status" } +``` + +### `configure_cluster` + +Resets and replaces the entire clustering configuration. Each entry follows the `add_node` schema. + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "hostname": "server-two", + "subscriptions": [{ "database": "dev", "table": "dog", "subscribe": true, "publish": true }] + } + ] +} +``` + +--- + +## Configuration + +Operations for reading and updating Harper configuration. + +Detailed documentation: [TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview'] + +| Operation | Description | Role Required | +| ------------------- | ---------------------------------------------------------------- | ------------- | +| `set_configuration` | Modifies Harper configuration file parameters (requires restart) | super_user | +| `get_configuration` | Returns the current Harper configuration | super_user | + +### `set_configuration` + +Updates configuration parameters in `harperdb-config.yaml`. A restart (`restart` or `restart_service`) is required for changes to take effect. + +```json +{ + "operation": "set_configuration", + "logging_level": "trace", + "clustering_enabled": true +} +``` + +### `get_configuration` + +Returns the full current configuration object. + +```json +{ "operation": "get_configuration" } +``` + +--- + +## System + +Operations for restarting Harper and managing system state. + +| Operation | Description | Role Required | +| -------------------- | ----------------------------------------------------- | ------------- | +| `restart` | Restarts the Harper instance | super_user | +| `restart_service` | Restarts a specific Harper service | super_user | +| `system_information` | Returns detailed host system metrics | super_user | +| `set_status` | Sets an application-specific status value (in-memory) | super_user | +| `get_status` | Returns a previously set status value | super_user | +| `clear_status` | Removes a status entry | super_user | + +### `restart` + +Restarts all Harper processes. May take up to 60 seconds. + +```json +{ "operation": "restart" } +``` + +### `restart_service` + +Restarts a specific service. `service` must be one of: `http_workers`, `clustering_config`, `clustering`. Supports `"replicated": true` for a rolling cluster restart. + +```json +{ "operation": "restart_service", "service": "http_workers" } +``` + +### `system_information` + +Returns system metrics including CPU, memory, disk, network, and Harper process info. Optionally filter by `attributes` array (e.g., `["cpu", "memory", "replication"]`). + +```json +{ "operation": "system_information" } +``` + +### `set_status` / `get_status` / `clear_status` + +Manage in-memory application status values. Status types: `primary`, `maintenance`, `availability` (availability only accepts `'Available'` or `'Unavailable'`). Status is not persisted across restarts. + +```json +{ "operation": "set_status", "id": "primary", "status": "active" } +``` + +--- + +## Jobs + +Operations for querying background job status. + +Detailed documentation: [Database Jobs](../database/jobs.md) + +| Operation | Description | Role Required | +| --------------------------- | ------------------------------------------------ | ------------- | +| `get_job` | Returns status and results for a specific job ID | any | +| `search_jobs_by_start_date` | Returns jobs within a specified time window | super_user | + +### `get_job` + +Returns job status (`COMPLETE`, `IN_PROGRESS`, `ERROR`), timing, and result message for the specified job ID. Bulk import/export operations return a job ID on initiation. + +```json +{ "operation": "get_job", "id": "4a982782-929a-4507-8794-26dae1132def" } +``` + +### `search_jobs_by_start_date` + +Returns all jobs started within the specified datetime range. + +```json +{ + "operation": "search_jobs_by_start_date", + "from_date": "2021-01-25T22:05:27.464+0000", + "to_date": "2021-01-25T23:05:27.464+0000" +} +``` + +--- + +## Logs + +Operations for reading Harper logs. + +Detailed documentation: [Logging Operations](../logging/operations.md) + +| Operation | Description | Role Required | +| -------------------------------- | ---------------------------------------------------------------------- | ------------- | +| `read_log` | Returns entries from the primary `hdb.log` | super_user | +| `read_transaction_log` | Returns transaction history for a table | super_user | +| `delete_transaction_logs_before` | Deletes transaction log entries older than a timestamp | super_user | +| `read_audit_log` | Returns verbose audit history for a table (requires audit log enabled) | super_user | +| `delete_audit_logs_before` | Deletes audit log entries older than a timestamp | super_user | + +### `read_log` + +Returns entries from `hdb.log`. Filter by `level` (`notify`, `error`, `warn`, `info`, `debug`, `trace`), date range (`from`, `until`), and text `filter`. + +```json +{ + "operation": "read_log", + "start": 0, + "limit": 100, + "level": "error" +} +``` + +### `read_transaction_log` + +Returns transaction history for a specific table. Optionally filter by `from`/`to` (millisecond epoch) and `limit`. + +```json +{ + "operation": "read_transaction_log", + "schema": "dev", + "table": "dog", + "limit": 10 +} +``` + +### `read_audit_log` + +Returns verbose audit history including original record state. Requires `logging.auditLog: true` in configuration. Filter by `search_type`: `hash_value`, `timestamp`, or `username`. + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "username", + "search_values": ["admin"] +} +``` + +--- + +## Certificate Management + +Operations for managing TLS certificates in the `hdb_certificate` system table. + +Detailed documentation: [Certificate Management](../security/certificate-management.md) + +| Operation | Description | Role Required | +| -------------------- | ---------------------------------------------- | ------------- | +| `add_certificate` | Adds or updates a certificate | super_user | +| `remove_certificate` | Removes a certificate and its private key file | super_user | +| `list_certificates` | Lists all certificates | super_user | + +### `add_certificate` + +Adds a certificate to `hdb_certificate`. If a `private_key` is provided, it is written to `/keys/` (not stored in the table). If no private key is provided, the operation searches for a matching one on disk. + +```json +{ + "operation": "add_certificate", + "name": "my-cert", + "certificate": "-----BEGIN CERTIFICATE-----...", + "is_authority": false, + "private_key": "-----BEGIN RSA PRIVATE KEY-----..." +} +``` + +--- + +## Analytics + +Operations for querying analytics metrics. + +Detailed documentation: [Analytics Operations](../analytics/operations.md) + +| Operation | Description | Role Required | +| ----------------- | ----------------------------------------------- | ------------- | +| `get_analytics` | Retrieves analytics data for a specified metric | any | +| `list_metrics` | Lists available analytics metrics | any | +| `describe_metric` | Returns the schema of a specific metric | any | + +### `get_analytics` + +Retrieves analytics data. Supports `start_time`/`end_time` (Unix ms), `get_attributes`, and `conditions` (same format as `search_by_conditions`). + +```json +{ + "operation": "get_analytics", + "metric": "resource-usage", + "start_time": 1769198332754, + "end_time": 1769198532754 +} +``` + +### `list_metrics` + +Returns available metric names. Filter by `metric_types`: `custom`, `builtin` (default: `builtin`). + +```json +{ "operation": "list_metrics" } +``` + +--- + +## Registration & Licensing + +Operations for license management. + +| Operation | Description | Role Required | +| ----------------------- | -------------------------------------------------- | ------------- | +| `registration_info` | Returns registration and version information | any | +| `install_usage_license` | Installs a Harper usage license block | super_user | +| `get_usage_licenses` | Returns all usage licenses with consumption counts | super_user | +| `get_fingerprint` | _(Deprecated)_ Returns the machine fingerprint | super_user | +| `set_license` | _(Deprecated)_ Sets a license key | super_user | + +### `registration_info` + +Returns the instance registration status, version, RAM allocation, and license expiration. + +```json +{ "operation": "registration_info" } +``` + +### `install_usage_license` + +Installs a usage license block. A license is a JWT-like structure (`header.payload.signature`) signed by Harper. Multiple blocks may be installed; earliest blocks are consumed first. + +```json +{ + "operation": "install_usage_license", + "license": "abc...0123.abc...0123.abc...0123" +} +``` + +### `get_usage_licenses` + +Returns all usage licenses (including expired/exhausted) with current consumption counts. Optionally filter by `region`. + +```json +{ "operation": "get_usage_licenses" } +``` + +--- + +## Deprecated Operations + +The following operations are deprecated and should not be used in new code. + +### Custom Functions (Deprecated) + +Custom Functions were the precursor to the Component architecture introduced in v4.2.0. These operations are preserved for backward compatibility. + +Deprecated in: v4.2.0 (moved to legacy in v4.7+) + +For modern equivalents, see [Components Overview](../components/overview.md). + +| Operation | Description | +| --------------------------------- | ------------------------------------------------ | +| `custom_functions_status` | Returns Custom Functions server status | +| `get_custom_functions` | Lists all Custom Function projects | +| `get_custom_function` | Returns a Custom Function file's content | +| `set_custom_function` | Creates or updates a Custom Function file | +| `drop_custom_function` | Deletes a Custom Function file | +| `add_custom_function_project` | Creates a new Custom Function project | +| `drop_custom_function_project` | Deletes a Custom Function project | +| `package_custom_function_project` | Packages a Custom Function project as base64 tar | +| `deploy_custom_function_project` | Deploys a packaged Custom Function project | + +### Other Deprecated Operations + +| Operation | Replaced By | +| ---------------------- | ------------------------------------------------------------------- | +| `install_node_modules` | Handled automatically by `deploy_component` and `restart` | +| `get_fingerprint` | Use `registration_info` | +| `set_license` | Use `install_usage_license` | +| `search_by_hash` | Use `search_by_id` | +| `search_attribute` | Use `attribute` field in `search_by_value` / `search_by_conditions` | +| `search_value` | Use `value` field in `search_by_value` / `search_by_conditions` | +| `search_type` | Use `comparator` field in `search_by_conditions` | diff --git a/reference_versioned_docs/version-v4/operations-api/overview.md b/reference_versioned_docs/version-v4/operations-api/overview.md new file mode 100644 index 00000000..969d21ab --- /dev/null +++ b/reference_versioned_docs/version-v4/operations-api/overview.md @@ -0,0 +1,87 @@ +--- +title: Operations API Overview +--- + + + +# Operations API + +The Operations API provides a comprehensive set of capabilities for configuring, deploying, administering, and controlling Harper. It is the primary programmatic interface for all administrative and operational tasks that are not handled through the REST interface. + +## Endpoint + +All Operations API requests are sent as HTTP POST requests to the Operations API endpoint. By default, this listens on port `9925` on the root path: + +``` +POST http://:9925/ +``` + +See [TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview'] for how to change the port and other network settings (`operationsApi.network.port`, `operationsApi.network.securePort`). + +## Request Format + +Each request body must be a JSON object with an `operation` field that identifies the operation to perform: + +```http +POST https://my-harper-server:9925/ +Authorization: Basic YourBase64EncodedUser:Pass +Content-Type: application/json + +{ + "operation": "create_table", + "table": "my-table" +} +``` + +## Authentication + +Operations API requests must be authenticated. Harper supports two authentication methods: + +- **Basic Auth**: Base64-encoded `username:password` in the `Authorization` header. See [Basic Authentication](../security/basic-authentication.md). +- **JWT**: A Bearer token in the `Authorization` header, obtained via `create_authentication_tokens`. See [JWT Authentication](../security/jwt-authentication.md). + +The `create_authentication_tokens` operation itself does not require prior authentication — it accepts a username and password and returns an operation token and refresh token. + +## Example with curl + +```bash +curl --location --request POST 'https://my-harper-server:9925/' \ + --header 'Authorization: Basic YourBase64EncodedUser:Pass' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "operation": "create_table", + "table": "my-table" + }' +``` + +## Authorization + +Most operations are restricted to `super_user` roles. This is noted in the documentation for each operation. Some operations (such as `user_info`, `get_job`, and `create_authentication_tokens`) are available to all authenticated users. + +## Operations Reference + +Operations are grouped by topic. See [Operations](./operations.md) for the complete reference list. + +**Topic categories:** + +| Category | Description | Detailed Docs | +| ------------------------------------------------------------------- | -------------------------------------------------------------- | --------------------------------------------------------------- | +| [Databases & Tables](./operations.md#databases--tables) | Create and manage databases, tables, and attributes | [Database Overview](../database/overview.md) | +| [NoSQL Operations](./operations.md#nosql-operations) | Insert, update, upsert, delete, and query records | [REST Querying Reference](../rest/querying.md) | +| [Bulk Operations](./operations.md#bulk-operations) | CSV/S3 import and export, batch delete | [Database Jobs](../database/jobs.md) | +| [SQL Operations](./operations.md#sql-operations) | Execute SQL statements (use for investigation, not production) | — | +| [Users & Roles](./operations.md#users--roles) | Manage users and role-based access control | [Users & Roles Operations](../users-and-roles/operations.md) | +| [Token Authentication](./operations.md#token-authentication) | Create and refresh JWT tokens | [JWT Authentication](../security/jwt-authentication.md) | +| [Components](./operations.md#components) | Deploy and manage Harper components | [Components Overview](../components/overview.md) | +| [Replication & Clustering](./operations.md#replication--clustering) | Configure cluster topology and replication | [Replication & Clustering](../replication/clustering.md) | +| [Configuration](./operations.md#configuration) | Read and update Harper configuration | — | +| [System](./operations.md#system) | Restart, system information, status management | — | +| [Jobs](./operations.md#jobs) | Query background job status | [Database Jobs](../database/jobs.md) | +| [Logs](./operations.md#logs) | Read standard, transaction, and audit logs | [Logging Operations](../logging/operations.md) | +| [Certificate Management](./operations.md#certificate-management) | Manage TLS certificates | [Certificate Management](../security/certificate-management.md) | +| [Analytics](./operations.md#analytics) | Query analytics metrics | [Analytics Operations](../analytics/operations.md) | +| [Registration & Licensing](./operations.md#registration--licensing) | License management | — | + +## Past Release API Documentation + +For API documentation prior to v4.0, see [olddocs.harperdb.io](https://olddocs.harperdb.io). diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 7513afcc..4e6aac99 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -431,6 +431,24 @@ } ] }, + { + "type": "category", + "label": "Operations API", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "operations-api/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "operations-api/operations", + "label": "Operations" + } + ] + }, { "type": "category", "label": "Legacy", From ffc57e0d2bdf4b811d951d1a6015486433727549 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 27 Mar 2026 11:04:22 -0600 Subject: [PATCH 30/51] Configuration Section Migration (#463) * docs: migrate Configuration section to v4 consolidated reference Co-Authored-By: Claude Sonnet 4.6 * manual review --------- Co-authored-by: Claude Sonnet 4.6 --- .../configuration-link-placeholders.md | 61 ++++ .../version-v4/configuration/operations.md | 141 ++++++++ .../version-v4/configuration/options.md | 319 ++++++++++++++++++ .../version-v4/configuration/overview.md | 209 ++++++++++++ .../version-v4-sidebars.json | 23 ++ 5 files changed, 753 insertions(+) create mode 100644 migration-context/link-placeholders/configuration-link-placeholders.md create mode 100644 reference_versioned_docs/version-v4/configuration/operations.md create mode 100644 reference_versioned_docs/version-v4/configuration/options.md create mode 100644 reference_versioned_docs/version-v4/configuration/overview.md diff --git a/migration-context/link-placeholders/configuration-link-placeholders.md b/migration-context/link-placeholders/configuration-link-placeholders.md new file mode 100644 index 00000000..113520bd --- /dev/null +++ b/migration-context/link-placeholders/configuration-link-placeholders.md @@ -0,0 +1,61 @@ +# Link Placeholders for Configuration + +## reference_versioned_docs/version-v4/configuration/options.md + +- Line (http.mtls section): `[mTLS Authentication](TODO:reference_versioned_docs/version-v4/security/mtls-authentication.md)` + - Context: Referencing full mTLS authentication reference from HTTP mTLS config sub-section + - Target should be: `../security/mtls-authentication.md` + +- Line (http.mtls.certificateVerification): `[Certificate Verification](TODO:reference_versioned_docs/version-v4/security/certificate-verification.md)` + - Context: Referencing certificate revocation checking (CRL/OCSP) documentation + - Target should be: `../security/certificate-verification.md` + +- Line (tls section): `[TLS](TODO:reference_versioned_docs/version-v4/http/tls.md)` + - Context: Linking to TLS configuration reference in the HTTP section + - Target should be: `../http/tls.md` + +- Line (tls section): `[Certificate Management](TODO:reference_versioned_docs/version-v4/security/certificate-management.md)` + - Context: Linking to certificate management operations + - Target should be: `../security/certificate-management.md` + +- Line (mqtt section): `[MQTT Configuration](TODO:reference_versioned_docs/version-v4/mqtt/configuration.md)` + - Context: Referencing full MQTT configuration reference + - Target should be: `../mqtt/configuration.md` + +- Line (logging section): `[logger API](TODO:reference_versioned_docs/version-v4/logging/api.md)` + - Context: `logging.external` section referencing the logger API for components + - Target should be: `../logging/api.md` + +- Line (replication section): `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` + - Context: Referencing replication overview from replication config section + - Target should be: `../replication/overview.md` + +- Line (replication section): `[Clustering](TODO:reference_versioned_docs/version-v4/replication/clustering.md)` + - Context: Referencing clustering reference from replication config section + - Target should be: `../replication/clustering.md` + +- Line (replication.shard): `[Sharding](TODO:reference_versioned_docs/version-v4/replication/sharding.md)` + - Context: Referencing sharding documentation for `replication.shard` config option + - Target should be: `../replication/sharding.md` + +- Line (storage.compactOnStart): `[Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md)` + - Context: Referencing compaction documentation from `compactOnStart` storage option + - Target should be: `../database/compaction.md` + +- Line (localStudio section): `[Studio](TODO:reference_versioned_docs/version-v4/studio/overview.md)` + - Context: Referencing Studio overview from `localStudio` config section + - Target should be: `../studio/overview.md` + +- Line (Components section): `[Components](TODO:reference_versioned_docs/version-v4/components/overview.md)` + - Context: Referencing components overview from component config section + - Target should be: `../components/overview.md` + +## reference_versioned_docs/version-v4/configuration/operations.md + +- Line (set_configuration description): `[restart](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart)` + - Context: Linking to the restart system operation needed after set_configuration + - Target should be: `../operations-api/operations.md#restart` + +- Line (set_configuration description): `[restart_service](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart-service)` + - Context: Linking to the restart_service operation needed after set_configuration + - Target should be: `../operations-api/operations.md#restart-service` diff --git a/reference_versioned_docs/version-v4/configuration/operations.md b/reference_versioned_docs/version-v4/configuration/operations.md new file mode 100644 index 00000000..23361381 --- /dev/null +++ b/reference_versioned_docs/version-v4/configuration/operations.md @@ -0,0 +1,141 @@ +--- +title: Configuration Operations +--- + + + +# Configuration Operations + +Operations API endpoints for reading and modifying Harper configuration. + +_All operations in this section are restricted to `super_user` roles._ + +For the full list of configurable options, see [Configuration Options](./options.md). + +--- + +## Set Configuration + +Modifies one or more Harper configuration parameters. **Requires a [restart](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart 'restart operation') or [restart_service](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart-service 'restart_service operation') to take effect.** + +`operation` _(required)_ — must be `set_configuration` + +Additional properties correspond to configuration keys in underscore-separated format (e.g. `logging_level` for `logging.level`, `clustering_enabled` for `clustering.enabled`). + +### Body + +```json +{ + "operation": "set_configuration", + "logging_level": "trace", + "clustering_enabled": true +} +``` + +### Response: 200 + +```json +{ + "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." +} +``` + +--- + +## Get Configuration + +Returns the current Harper configuration. + +`operation` _(required)_ — must be `get_configuration` + +### Body + +```json +{ + "operation": "get_configuration" +} +``` + +### Response: 200 + +```json +{ + "http": { + "compressionThreshold": 1200, + "cors": false, + "corsAccessList": [null], + "keepAliveTimeout": 30000, + "port": 9926, + "securePort": null, + "timeout": 120000 + }, + "threads": 11, + "authentication": { + "cacheTTL": 30000, + "enableSessions": true, + "operationTokenTimeout": "1d", + "refreshTokenTimeout": "30d" + }, + "analytics": { + "aggregatePeriod": 60 + }, + "replication": { + "hostname": "node1", + "databases": "*", + "routes": null, + "url": "wss://127.0.0.1:9925" + }, + "componentsRoot": "/Users/hdb/components", + "localStudio": { + "enabled": false + }, + "logging": { + "auditAuthEvents": { + "logFailed": false, + "logSuccessful": false + }, + "auditLog": true, + "auditRetention": "3d", + "file": true, + "level": "error", + "root": "/Users/hdb/log", + "rotation": { + "enabled": false, + "compress": false, + "interval": null, + "maxSize": null, + "path": "/Users/hdb/log" + }, + "stdStreams": false + }, + "mqtt": { + "network": { + "port": 1883, + "securePort": 8883 + }, + "webSocket": true, + "requireAuthentication": true + }, + "operationsApi": { + "network": { + "cors": true, + "corsAccessList": ["*"], + "domainSocket": "/Users/hdb/operations-server", + "port": 9925, + "securePort": null + } + }, + "rootPath": "/Users/hdb", + "storage": { + "writeAsync": false, + "caching": true, + "compression": false, + "noReadAhead": true, + "path": "/Users/hdb/database", + "prefetchWrites": true + }, + "tls": { + "privateKey": "/Users/hdb/keys/privateKey.pem" + } +} +``` diff --git a/reference_versioned_docs/version-v4/configuration/options.md b/reference_versioned_docs/version-v4/configuration/options.md new file mode 100644 index 00000000..687e4cf2 --- /dev/null +++ b/reference_versioned_docs/version-v4/configuration/options.md @@ -0,0 +1,319 @@ +--- +title: Configuration Options +--- + + + + + + + + +# Configuration Options + +Quick reference for all `harperdb-config.yaml` top-level sections. + +For how to apply configuration (YAML file, environment variables, CLI, Operations API), see [Configuration Overview](./overview.md). + +--- + +## `http` + +Configures the Harper component server (HTTP, REST API, WebSocket). See [HTTP Configuration](../http/configuration.md) for full details. + +```yaml +http: + port: 9926 + securePort: 4443 + cors: true + timeout: 120000 + mtls: false + logging: + level: info + path: ~/hdb/log/http.log +``` + +- `sessionAffinity` — Route requests from same client to same worker thread (`ip` or header name) +- `compressionThreshold` — Response size threshold for Brotli compression; _Default_: `1200` (bytes) +- `cors` — Enable CORS; _Default_: `true` +- `corsAccessList` — Allowed domains for CORS requests +- `corsAccessControlAllowHeaders` — `Access-Control-Allow-Headers` value for OPTIONS preflight +- `headersTimeout` — Max wait for complete HTTP headers (ms); _Default_: `60000` +- `maxHeaderSize` — Max HTTP header size (bytes); _Default_: `16394` +- `requestQueueLimit` — Max estimated request queue time (ms) before 503; _Default_: `20000` +- `keepAliveTimeout` — Inactivity before closing keep-alive connection (ms); _Default_: `30000` +- `port` — HTTP port; _Default_: `9926` +- `securePort` — HTTPS port; requires [TLS configuration](../http/tls.md); _Default_: `null` +- `http2` — Enable HTTP/2; _Default_: `false` (Added in: v4.5.0) +- `timeout` — Request timeout (ms); _Default_: `120000` +- `mtls` — Enable [mTLS authentication](../security/mtls-authentication.md) for incoming connections; sub-options: `user`, `required`, `certificateVerification` (see [Certificate Verification](../security/certificate-verification.md)) +- `logging` — HTTP request logging (disabled by default, Added in: v4.6.0); sub-options: `level`, `path`, `timing`, `headers`, `id`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `threads` + +Worker thread pool configuration. + +```yaml +threads: + count: 11 + maxHeapMemory: 300 +``` + +- `count` — Number of worker threads; _Default_: CPU count minus one +- `maxHeapMemory` — Heap limit per thread (MB) +- `heapSnapshotNearLimit` — Take heap snapshot when approaching limit +- `debug` — Enable debugging; sub-options: `port`, `startingPort`, `host`, `waitForDebugger` + +--- + +## `authentication` + +Authentication and session configuration. Added in: v4.1.0; `enableSessions` added in v4.2.0. See [Authentication Configuration](../security/configuration.md). + +```yaml +authentication: + authorizeLocal: true + cacheTTL: 30000 + enableSessions: true + operationTokenTimeout: 1d + refreshTokenTimeout: 30d +``` + +- `authorizeLocal` — Auto-authorize loopback requests as superuser; _Default_: `true` +- `cacheTTL` — Session cache duration (ms); _Default_: `30000` +- `enableSessions` — Cookie-based sessions; _Default_: `true` +- `operationTokenTimeout` — Access token lifetime; _Default_: `1d` +- `refreshTokenTimeout` — Refresh token lifetime; _Default_: `1d` +- `logging` — Authentication event logging (Added in: v4.6.0); sub-options: `path`, `level`, `tag`, `stdStreams`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `operationsApi` + +Harper Operations API endpoint configuration. See [Operations API Overview](../operations-api/overview.md). + +```yaml +operationsApi: + network: + port: 9925 + cors: true + tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +- `network.cors` / `network.corsAccessList` — CORS settings +- `network.domainSocket` — Unix socket path for CLI communication; _Default_: `/hdb/operations-server` +- `network.headersTimeout` / `network.keepAliveTimeout` / `network.timeout` — Timeout settings (ms) +- `network.port` — Operations API port; _Default_: `9925` +- `network.securePort` — HTTPS port; _Default_: `null` +- `tls` — TLS override for the Operations API; sub-options: `certificate`, `certificateAuthority`, `privateKey`. See [`tls`](#tls) + +--- + +## `tls` + +Global TLS configuration for HTTPS and TLS sockets (used by HTTP and MQTT). Can be a single object or an array for SNI. See [TLS](../http/tls.md) and [Certificate Management](../security/certificate-management.md). + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +- `certificate` — Path to TLS certificate; _Default_: `/keys/certificate.pem` +- `certificateAuthority` — Path to CA file; _Default_: `/keys/ca.pem` +- `privateKey` — Path to private key; _Default_: `/keys/privateKey.pem` +- `ciphers` — Allowed TLS cipher suites + +--- + +## `mqtt` + +MQTT protocol configuration. Added in: v4.2.0. See [MQTT Configuration](../mqtt/configuration.md). + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + webSocket: true + requireAuthentication: true +``` + +- `network.port` — Insecure MQTT port; _Default_: `1883` +- `network.securePort` — Secure MQTT port; _Default_: `8883` +- `network.mtls` — Enable [mTLS](../security/mtls-authentication.md) for MQTT connections; sub-options: `user`, `required`, `certificateAuthority`, `certificateVerification` +- `webSocket` — Enable MQTT over WebSocket on HTTP port; _Default_: `true` +- `requireAuthentication` — Require credentials or mTLS; _Default_: `true` +- `logging` — MQTT event logging (Added in: v4.6.0); sub-options: `path`, `level`, `tag`, `stdStreams`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `logging` + +Application logging. Added in: v4.1.0; per-component logging added in v4.6.0. See [Logging Configuration](../logging/configuration.md). + +```yaml +logging: + level: warn + root: ~/hdb/log + stdStreams: false + auditLog: false + rotation: + interval: 1D + maxSize: 100M +``` + +- `level` — Log verbosity (`trace` → `debug` → `info` → `warn` → `error` → `fatal` → `notify`); _Default_: `warn` +- `file` — Write to file; _Default_: `true` +- `root` — Log directory; _Default_: `/log` +- `path` — Explicit log file path (overrides `root`) +- `stdStreams` — Write to stdout/stderr; _Default_: `false` +- `console` — Include `console.*` output; _Default_: `true` +- `auditLog` — Enable table transaction audit logging; _Default_: `false` +- `auditRetention` — Audit log retention duration; _Default_: `3d` +- `external` — Logging for components using the logger API; sub-options: `level`, `path` +- `rotation.enabled` / `rotation.compress` / `rotation.interval` / `rotation.maxSize` / `rotation.path` — Log file rotation (activates when `interval` or `maxSize` is set) +- `auditAuthEvents.logFailed` / `auditAuthEvents.logSuccessful` — Log failed/successful authentication events; _Default_: `false` + +--- + +## `replication` + +Native WebSocket-based replication (Plexus). Added in: v4.4.0. See [Replication](../replication/overview.md) and [Clustering](../replication/clustering.md). + +```yaml +replication: + hostname: server-one + url: wss://server-one:9933 + databases: '*' + routes: + - wss://server-two:9933 +``` + +- `hostname` — This instance's hostname within the cluster +- `url` — WebSocket URL peers use to connect to this instance +- `databases` — Databases to replicate; _Default_: `"*"` (all). Each entry supports `name` and `sharded` +- `routes` — Peer nodes; URL strings or `{hostname, port, startTime, revokedCertificates}` objects +- `port` — Replication port +- `securePort` — Secure replication port; _Default_: `9933` (changed from `9925` in v4.5.0) +- `enableRootCAs` — Verify against Node.js Mozilla CA store; _Default_: `true` +- `blobTimeout` — Blob transfer timeout (ms); _Default_: `120000` +- `failOver` — Failover to alternate node if peer unreachable; _Default_: `true` +- `shard` — Shard ID for traffic routing; see [Sharding](../replication/sharding.md) +- `mtls.certificateVerification` — Certificate revocation checking (CRL/OCSP) for replication connections; see [Certificate Verification](../security/certificate-verification.md) +- `logging` — Replication event logging; sub-options: `path`, `level`, `tag`, `stdStreams`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `storage` + +Database storage configuration. See [Database Overview](../database/overview.md) and [Compaction](../database/compaction.md). + +```yaml +storage: + path: ~/hdb/database + caching: true + compression: true + compactOnStart: false +``` + +- `writeAsync` — Disable disk sync for higher throughput (**disables durability guarantees**); _Default_: `false` +- `caching` — In-memory record caching; _Default_: `true` +- `compression` — LZ4 record compression; _Default_: `true` (enabled by default since v4.3.0). Sub-options: `dictionary`, `threshold` +- `compactOnStart` — Compact all non-system databases on startup; _Default_: `false` (Added in: v4.3.0) +- `compactOnStartKeepBackup` — Retain compaction backups; _Default_: `false` +- `maxTransactionQueueTime` — Max write queue time before 503; _Default_: `45s` +- `noReadAhead` — Advise OS against read-ahead; _Default_: `false` +- `prefetchWrites` — Prefetch before write transactions; _Default_: `true` +- `path` — Database files directory; _Default_: `/database` +- `blobPaths` — Blob storage directory or directories; _Default_: `/blobs` (Added in: v4.5.0) +- `pageSize` — Database page size (bytes); _Default_: OS default +- `reclamation.threshold` / `reclamation.interval` / `reclamation.evictionFactor` — Background storage reclamation settings (Added in: v4.5.0) + +--- + +## `databases` + +Per-database and per-table file path overrides. Must be set before the database/table is created. See [Database Overview](../database/overview.md). + +```yaml +databases: + myDatabase: + path: /data/myDatabase + auditPath: /data/myDatabase-audit + tables: + myTable: + path: /data/myTable +``` + +- `.path` — Database files directory +- `.auditPath` — Audit log directory for this database +- `.tables..path` — Table files directory + +--- + +## `analytics` + +Analytics aggregation configuration. See [Analytics Overview](../analytics/overview.md). + +```yaml +analytics: + aggregatePeriod: 60 + replicate: false +``` + +- `aggregatePeriod` — Aggregation interval (seconds); _Default_: `60` (Added in: v4.5.0) +- `replicate` — Replicate analytics data across cluster; _Default_: `false` + +--- + +## `localStudio` + +Local Harper Studio GUI. See [Studio](../studio/overview.md). + +```yaml +localStudio: + enabled: true +``` + +- `enabled` — Enable local Studio at `http://localhost:`; _Default_: `false` + +--- + +## `componentsRoot` + +Path to local component files. Added in: v4.2.0 (previously `customFunctionsRoot`). See [Components](../components/overview.md). + +```yaml +componentsRoot: ~/hdb/components +``` + +--- + +## `rootPath` + +Root directory for all Harper persistent data, config, logs, and components. + +```yaml +rootPath: /var/lib/harper +``` + +--- + +## Component Configuration + +Installed components are configured directly at the root of `harperdb-config.yaml` using the component name as the key — not nested under a `components:` section. See [Components](../components/overview.md). + +```yaml +my-component: + package: 'HarperDB-Add-Ons/my-component' + port: 4321 +``` + +- `.package` — NPM package name, GitHub repo (`user/repo`), or local path +- `.port` — Port for the component; _Default_: value of `http.port` diff --git a/reference_versioned_docs/version-v4/configuration/overview.md b/reference_versioned_docs/version-v4/configuration/overview.md new file mode 100644 index 00000000..747e681c --- /dev/null +++ b/reference_versioned_docs/version-v4/configuration/overview.md @@ -0,0 +1,209 @@ +--- +title: Configuration Overview +--- + + + + +# Configuration + +Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory. By default the root directory is a folder named `hdb` in the home directory of the current user. + +Some configuration values are pre-populated in the config file on install, regardless of whether they are used. + +For a complete reference of all available configuration options, see [Configuration Options](./options.md). + +--- + +## The Configuration File + +To change a configuration value, edit `harperdb-config.yaml` and save. **Harper must be restarted for changes to take effect.** + +Configuration keys use camelCase (e.g. `operationsApi`). Nested keys use dot notation conceptually (e.g. `operationsApi.network.port`). + +--- + +## Setting Configuration Values + +All configuration values can be set through four mechanisms: + +### 1. YAML File (direct edit) + +Edit `harperdb-config.yaml` directly: + +```yaml +http: + port: 9926 +logging: + level: warn +``` + +### 2. Environment Variables + +Map YAML keys to `SCREAMING_SNAKE_CASE`. Use underscores for nesting. Keys are case-insensitive. + +Examples: + +- `http.port` → `HTTP_PORT=9926` +- `logging.rotation.enabled` → `LOGGING_ROTATION_ENABLED=false` +- `operationsApi.network.port` → `OPERATIONSAPI_NETWORK_PORT=9925` + +```bash +HTTP_PORT=9926 harperdb +``` + +> **Note:** Component configuration cannot be set via environment variables or CLI arguments. + +### 3. CLI Arguments + +Same naming convention as environment variables, prefixed with `--`: + +```bash +harperdb --HTTP_PORT 9926 --LOGGING_LEVEL warn +``` + +### 4. Operations API + +Use `set_configuration` with underscore-separated key paths: + +```json +{ + "operation": "set_configuration", + "http_port": 9926, + "logging_level": "warn" +} +``` + +See [Configuration Operations](./operations.md) for the full `set_configuration` and `get_configuration` API reference. + +--- + +## Custom Config File Path + +To specify a custom config file location at install time, use the `HDB_CONFIG` variable: + +```bash +# Use a custom config file path +HDB_CONFIG=/path/to/custom-config.yaml harperdb + +# Install over an existing config +HDB_CONFIG=/existing/rootpath/harperdb-config.yaml harperdb +``` + +--- + +## Environment Variable-Based Configuration + +Added in: v4.7.2 + +Harper provides two special environment variables for managing configuration across deployments: `HARPER_DEFAULT_CONFIG` and `HARPER_SET_CONFIG`. Both accept JSON-formatted configuration that mirrors the structure of `harperdb-config.yaml`. + +```bash +export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' +export HARPER_SET_CONFIG='{"authentication":{"enabled":true}}' +``` + +### HARPER_DEFAULT_CONFIG + +Provides default configuration values while respecting user modifications. Ideal for supplying sensible defaults without preventing administrators from customizing their instances. + +**At installation time:** + +- Overrides template default values +- Respects values set by `HARPER_SET_CONFIG` +- Respects values from existing config files (when using `HDB_CONFIG`) + +**At runtime:** + +- Only updates values it originally set +- Detects and respects manual user edits to the config file +- When a key is removed from the variable, the original value is restored + +**Example:** + +```bash +export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' +harperdb + +# If an administrator manually changes the port to 9000, Harper will +# detect this edit and respect it on subsequent restarts. + +# If http.port is removed from HARPER_DEFAULT_CONFIG later, +# the port reverts to the original template default (9926). +``` + +### HARPER_SET_CONFIG + +Forces configuration values that cannot be overridden by user edits. Designed for security policies, compliance requirements, or critical operational settings. + +**At runtime:** + +- Always overrides all other configuration sources +- Takes precedence over user edits, file values, and `HARPER_DEFAULT_CONFIG` +- When a key is removed from the variable, it is deleted from the config (not restored) + +**Example:** + +```bash +export HARPER_SET_CONFIG='{"authentication":{"enabled":true},"logging":{"level":"error","stdStreams":true}}' +harperdb + +# Any change to these values in harperdb-config.yaml will be +# overridden on the next restart. +``` + +### Combining Both Variables + +```bash +# Provide sensible defaults (can be overridden by admins) +export HARPER_DEFAULT_CONFIG='{"http":{"port":8080,"cors":true},"logging":{"level":"info"}}' + +# Enforce critical settings (cannot be changed) +export HARPER_SET_CONFIG='{"authentication":{"enabled":true}}' +``` + +### Configuration Precedence + +From highest to lowest: + +1. **`HARPER_SET_CONFIG`** — Always wins +2. **User manual edits** — Detected via drift detection +3. **`HARPER_DEFAULT_CONFIG`** — Applied if no user edits detected +4. **File defaults** — Original template values + +### State Tracking + +Harper maintains a state file at `{rootPath}/backup/.harper-config-state.json` to track the source of each configuration value. This enables: + +- **Drift detection**: Identifying when users manually edit values set by `HARPER_DEFAULT_CONFIG` +- **Restoration**: Restoring original values when keys are removed from `HARPER_DEFAULT_CONFIG` +- **Conflict resolution**: Determining which source should take precedence + +### Format Reference + +The JSON structure mirrors the YAML config file: + +**YAML:** + +```yaml +http: + port: 8080 + cors: true +logging: + level: info + rotation: + enabled: true +``` + +**Environment variable (JSON):** + +```json +{ "http": { "port": 8080, "cors": true }, "logging": { "level": "info", "rotation": { "enabled": true } } } +``` + +### Important Notes + +- Both variables must contain valid JSON matching the structure of `harperdb-config.yaml` +- Invalid values are caught by Harper's configuration validator at startup +- Changes to these variables require a Harper restart to take effect +- The state file is per-instance (stored in the root path) diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 4e6aac99..1fb38856 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -33,6 +33,29 @@ } ] }, + { + "type": "category", + "label": "Configuration", + "collapsible": false, + "className": "learn-category-header", + "items": [ + { + "type": "doc", + "id": "configuration/overview", + "label": "Overview" + }, + { + "type": "doc", + "id": "configuration/options", + "label": "Options" + }, + { + "type": "doc", + "id": "configuration/operations", + "label": "Operations" + } + ] + }, { "type": "category", "label": "Environment Variables", From 92ef6d5bc29c4b387261f6ab1fc6f6152d2dacb8 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Fri, 27 Mar 2026 11:23:02 -0600 Subject: [PATCH 31/51] update plan docs and add custom functions and sql pages --- .../version-v4/database/sql.md | 345 ++++++++++++++++++ .../version-v4/legacy/custom-functions.md | 13 + .../version-v4-sidebars.json | 10 + v4-docs-implementation-plan.md | 25 +- v4-docs-migration-map.md | 54 +-- 5 files changed, 412 insertions(+), 35 deletions(-) create mode 100644 reference_versioned_docs/version-v4/database/sql.md create mode 100644 reference_versioned_docs/version-v4/legacy/custom-functions.md diff --git a/reference_versioned_docs/version-v4/database/sql.md b/reference_versioned_docs/version-v4/database/sql.md new file mode 100644 index 00000000..d27fec67 --- /dev/null +++ b/reference_versioned_docs/version-v4/database/sql.md @@ -0,0 +1,345 @@ +--- +title: SQL +--- + + + + + + + + + + +:::warning +SQL querying is not recommended for production use or on large tables. SQL queries often do not utilize indexes and are not optimized for performance. Use the [REST interface](../rest/overview.md) for production data access — it provides a more stable, secure, and performant interface. SQL is intended for ad-hoc data investigation and administrative queries. +::: + +Harper includes a SQL interface supporting SELECT, INSERT, UPDATE, and DELETE operations. Tables are referenced using `database.table` notation (e.g., `dev.dog`). + +## Operations API + +SQL queries are executed via the Operations API using the `sql` operation: + +- `operation` _(required)_ — must be `sql` +- `sql` _(required)_ — the SQL statement to execute + +### Select + +```json +{ + "operation": "sql", + "sql": "SELECT * FROM dev.dog WHERE id = 1" +} +``` + +### Insert + +```json +{ + "operation": "sql", + "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" +} +``` + +Response: + +```json +{ + "message": "inserted 1 of 1 records", + "inserted_hashes": [22], + "skipped_hashes": [] +} +``` + +### Update + +```json +{ + "operation": "sql", + "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" +} +``` + +### Delete + +```json +{ + "operation": "sql", + "sql": "DELETE FROM dev.dog WHERE id = 1" +} +``` + +--- + +## SELECT Syntax + +```sql +SELECT * FROM dev.dog +SELECT id, dog_name, age FROM dev.dog +SELECT * FROM dev.dog ORDER BY age +SELECT * FROM dev.dog ORDER BY age DESC +SELECT DISTINCT breed_id FROM dev.dog +SELECT COUNT(*) FROM dev.dog WHERE age > 3 +``` + +### Joins + +Supported join types: `INNER JOIN`, `LEFT [OUTER] JOIN`, `RIGHT [OUTER] JOIN`, `FULL OUTER JOIN`, `CROSS JOIN`. + +```sql +SELECT d.id, d.dog_name, b.name +FROM dev.dog AS d +INNER JOIN dev.breed AS b ON d.breed_id = b.id +WHERE d.owner_name IN ('Kyle', 'Zach') +ORDER BY d.dog_name +``` + +--- + +## Features Matrix + +| INSERT | | +| ---------------------------------- | --- | +| Values — multiple values supported | ✔ | +| Sub-SELECT | ✗ | + +| UPDATE | | +| -------------- | --- | +| SET | ✔ | +| Sub-SELECT | ✗ | +| Conditions | ✔ | +| Date Functions | ✔ | +| Math Functions | ✔ | + +| DELETE | | +| ---------- | --- | +| FROM | ✔ | +| Sub-SELECT | ✗ | +| Conditions | ✔ | + +| SELECT | | +| ------------------- | --- | +| Column SELECT | ✔ | +| Aliases | ✔ | +| Aggregate Functions | ✔ | +| Date Functions | ✔ | +| Math Functions | ✔ | +| Constant Values | ✔ | +| DISTINCT | ✔ | +| Sub-SELECT | ✗ | + +| FROM | | +| ---------------- | --- | +| Multi-table JOIN | ✔ | +| INNER JOIN | ✔ | +| LEFT OUTER JOIN | ✔ | +| LEFT INNER JOIN | ✔ | +| RIGHT OUTER JOIN | ✔ | +| RIGHT INNER JOIN | ✔ | +| FULL JOIN | ✔ | +| UNION | ✗ | +| Sub-SELECT | ✗ | +| TOP | ✔ | + +| WHERE | | +| ---------------- | --- | +| Multi-Conditions | ✔ | +| Wildcards | ✔ | +| IN | ✔ | +| LIKE | ✔ | +| AND, OR, NOT | ✔ | +| NULL | ✔ | +| BETWEEN | ✔ | +| EXISTS, ANY, ALL | ✔ | +| Compare columns | ✔ | +| Date Functions | ✔ | +| Sub-SELECT | ✗ | + +| GROUP BY | | +| --------------------- | --- | +| Multi-Column GROUP BY | ✔ | + +| HAVING | | +| ----------------------------- | --- | +| Aggregate function conditions | ✔ | + +| ORDER BY | | +| --------------------- | --- | +| Multi-Column ORDER BY | ✔ | +| Aliases | ✔ | + +--- + +## Functions + +### Aggregate + +| Function | Description | +| ---------------------- | ----------------------------------------------------- | +| `AVG(expr)` | Average of a numeric expression. | +| `COUNT(col)` | Count of rows matching the criteria (nulls excluded). | +| `MAX(col)` | Largest value in a column. | +| `MIN(col)` | Smallest value in a column. | +| `SUM(col)` | Sum of numeric values. | +| `GROUP_CONCAT(expr)` | Comma-separated string of non-null values. | +| `ARRAY(expr)` | Returns a list of data as a field. | +| `DISTINCT_ARRAY(expr)` | Returns a deduplicated list. | + +### Conversion + +| Function | Description | +| ---------------------------------- | ------------------------------------------ | +| `CAST(expr AS datatype)` | Converts a value to the specified type. | +| `CONVERT(datatype, expr[, style])` | Converts a value from one type to another. | + +### String + +| Function | Description | +| ----------------------------- | ------------------------------------------------------- | +| `CONCAT(s1, s2, ...)` | Joins strings together. | +| `CONCAT_WS(sep, s1, s2, ...)` | Joins strings with a separator. | +| `INSTR(s1, s2)` | Position of s2 within s1. | +| `LEN(s)` | Length of a string. | +| `LOWER(s)` | Converts to lower-case. | +| `UPPER(s)` | Converts to upper-case. | +| `REPLACE(s, old, new)` | Replaces all instances of old with new. | +| `SUBSTRING(s, pos, len)` | Extracts a substring. | +| `TRIM([chars FROM] s)` | Removes leading and trailing spaces or specified chars. | +| `REGEXP pattern` | Matches a regular expression pattern. | +| `REGEXP_LIKE(col, pattern)` | Matches a regular expression pattern (function form). | + +### Mathematical + +| Function | Description | +| ------------------ | --------------------------------------- | +| `ABS(expr)` | Absolute value. | +| `CEIL(n)` | Smallest integer ≥ n. | +| `FLOOR(n)` | Largest integer ≤ n. | +| `EXP(n)` | e to the power of n. | +| `ROUND(n, places)` | Rounds to the specified decimal places. | +| `SQRT(expr)` | Square root. | +| `RANDOM(seed)` | Pseudo-random number. | + +### Logical + +| Function | Description | +| -------------------------------- | ------------------------------------------------------- | +| `IF(cond, true_val, false_val)` | Returns one of two values based on a condition. | +| `IIF(cond, true_val, false_val)` | Alias for IF. | +| `IFNULL(expr, alt)` | Returns alt if expr is null. | +| `NULLIF(expr1, expr2)` | Returns null if expr1 = expr2, otherwise returns expr1. | + +--- + +## Date & Time Functions + +All SQL date operations use UTC internally. Dates are parsed as [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), then [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3), then `new Date(string)`. + +| Function | Returns | +| ------------------------------------- | ------------------------------------------------------------------------------------------------ | +| `CURRENT_DATE()` | Current date as `YYYY-MM-DD`. | +| `CURRENT_TIME()` | Current time as `HH:mm:ss.SSS`. | +| `CURRENT_TIMESTAMP` | Current Unix timestamp in milliseconds. | +| `NOW()` | Current Unix timestamp in milliseconds. | +| `GETDATE()` | Current Unix timestamp in milliseconds. | +| `GET_SERVER_TIME()` | Current date/time in server's timezone as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. | +| `DATE([date_string])` | Date formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. | +| `DATE_ADD(date, value, interval)` | Adds time to a date; returns Unix ms. | +| `DATE_SUB(date, value, interval)` | Subtracts time from a date; returns Unix ms. | +| `DATE_DIFF(date1, date2[, interval])` | Difference between two dates. | +| `DATE_FORMAT(date, format)` | Formats a date using [moment.js format strings](https://momentjs.com/docs/#/displaying/format/). | +| `EXTRACT(date, date_part)` | Extracts a part (year, month, day, hour, minute, second, millisecond). | +| `OFFSET_UTC(date, offset)` | Returns the date adjusted by offset minutes (or hours if < 16). | +| `DAY(date)` | Day of the month. | +| `DAYOFWEEK(date)` | Day of the week (0=Sunday … 6=Saturday). | +| `HOUR(datetime)` | Hour part (0–838). | +| `MINUTE(datetime)` | Minute part (0–59). | +| `MONTH(date)` | Month (1–12). | +| `SECOND(datetime)` | Seconds part (0–59). | +| `YEAR(date)` | Year. | + +`DATE_ADD` and `DATE_SUB` accept these interval values: + +| Key | Shorthand | +| ------------ | --------- | +| years | y | +| quarters | Q | +| months | M | +| weeks | w | +| days | d | +| hours | h | +| minutes | m | +| seconds | s | +| milliseconds | ms | + +--- + +## JSON Search + +`SEARCH_JSON(expression, attribute)` queries nested JSON data that is not indexed by Harper. It uses the [JSONata](https://docs.jsonata.org/overview.html) library and works in both SELECT and WHERE clauses. + +```sql +-- Find records where the name array contains "Harper" +SELECT * FROM dev.dog +WHERE SEARCH_JSON('"Harper" in *', name) +``` + +```sql +-- Select and filter nested JSON in one query +SELECT m.title, + SEARCH_JSON($[name in ["Actor A", "Actor B"]].{"actor": name}, c.`cast`) AS cast +FROM movies.credits c +INNER JOIN movies.movie m ON c.movie_id = m.id +WHERE SEARCH_JSON($count($[name in ["Actor A", "Actor B"]]), c.`cast`) >= 2 +``` + +--- + +## Geospatial Functions + +Geospatial data must be stored using the [GeoJSON standard](https://geojson.org/) in a single column. All coordinates are in `[longitude, latitude]` format. + +| Function | Description | +| -------------------------------------------- | ------------------------------------------------------------------ | +| `geoArea(geoJSON)` | Area of features in square meters. | +| `geoLength(geoJSON[, units])` | Length in km (default), or degrees/radians/miles. | +| `geoDistance(point1, point2[, units])` | Distance between two points. | +| `geoNear(point1, point2, distance[, units])` | Returns boolean: true if points are within the specified distance. | +| `geoContains(geo1, geo2)` | Returns boolean: true if geo2 is completely contained by geo1. | +| `geoDifference(polygon1, polygon2)` | Returns a new polygon with polygon2 clipped from polygon1. | +| `geoEqual(geo1, geo2)` | Returns boolean: true if two GeoJSON features are identical. | +| `geoCrosses(geo1, geo2)` | Returns boolean: true if the geometries cross each other. | +| `geoConvert(coordinates, geo_type[, props])` | Converts coordinates into a GeoJSON of the specified type. | + +`units` options: `'degrees'`, `'radians'`, `'miles'`, `'kilometers'` (default). + +`geo_type` options for `geoConvert`: `'point'`, `'lineString'`, `'multiLineString'`, `'multiPoint'`, `'multiPolygon'`, `'polygon'`. + +--- + +## Logical Operators + +| Keyword | Description | +| --------- | ------------------------------------------------ | +| `BETWEEN` | Returns values within a given range (inclusive). | +| `IN` | Specifies multiple values in a WHERE clause. | +| `LIKE` | Searches for a pattern. | + +--- + +## Reserved Words + +If a database, table, or attribute name conflicts with a reserved word, wrap it in backticks or brackets: + +```sql +SELECT * FROM data.`ASSERT` +SELECT * FROM data.[ASSERT] +``` + +
+Full reserved word list + +ABSOLUTE, ACTION, ADD, AGGR, ALL, ALTER, AND, ANTI, ANY, APPLY, ARRAY, AS, ASSERT, ASC, ATTACH, AUTOINCREMENT, AUTO_INCREMENT, AVG, BEGIN, BETWEEN, BREAK, BY, CALL, CASE, CAST, CHECK, CLASS, CLOSE, COLLATE, COLUMN, COLUMNS, COMMIT, CONSTRAINT, CONTENT, CONTINUE, CONVERT, CORRESPONDING, COUNT, CREATE, CROSS, CUBE, CURRENT_TIMESTAMP, CURSOR, DATABASE, DECLARE, DEFAULT, DELETE, DELETED, DESC, DETACH, DISTINCT, DOUBLEPRECISION, DROP, ECHO, EDGE, END, ENUM, ELSE, EXCEPT, EXISTS, EXPLAIN, FALSE, FETCH, FIRST, FOREIGN, FROM, GO, GRAPH, GROUP, GROUPING, HAVING, HDB_HASH, HELP, IF, IDENTITY, IS, IN, INDEX, INNER, INSERT, INSERTED, INTERSECT, INTO, JOIN, KEY, LAST, LET, LEFT, LIKE, LIMIT, LOOP, MATCHED, MATRIX, MAX, MERGE, MIN, MINUS, MODIFY, NATURAL, NEXT, NEW, NOCASE, NO, NOT, NULL, OFF, ON, ONLY, OFFSET, OPEN, OPTION, OR, ORDER, OUTER, OVER, PATH, PARTITION, PERCENT, PLAN, PRIMARY, PRINT, PRIOR, QUERY, READ, RECORDSET, REDUCE, REFERENCES, RELATIVE, REPLACE, REMOVE, RENAME, REQUIRE, RESTORE, RETURN, RETURNS, RIGHT, ROLLBACK, ROLLUP, ROW, SCHEMA, SCHEMAS, SEARCH, SELECT, SEMI, SET, SETS, SHOW, SOME, SOURCE, STRATEGY, STORE, SYSTEM, SUM, TABLE, TABLES, TARGET, TEMP, TEMPORARY, TEXTSTRING, THEN, TIMEOUT, TO, TOP, TRAN, TRANSACTION, TRIGGER, TRUE, TRUNCATE, UNION, UNIQUE, UPDATE, USE, USING, VALUE, VERTEX, VIEW, WHEN, WHERE, WHILE, WITH, WORK + +
diff --git a/reference_versioned_docs/version-v4/legacy/custom-functions.md b/reference_versioned_docs/version-v4/legacy/custom-functions.md new file mode 100644 index 00000000..167c53f8 --- /dev/null +++ b/reference_versioned_docs/version-v4/legacy/custom-functions.md @@ -0,0 +1,13 @@ +--- +title: Custom Functions +--- + + + +Custom Functions were Harper's original mechanism for adding custom API endpoints and application logic to a Harper instance. They allowed developers to define Fastify-based HTTP routes that ran inside Harper with direct access to the database, and could be deployed across instances via Studio. + +Custom Functions were superseded by the [Components](../components/overview.md) system introduced in v4.2.0. Components provide the same capabilities with a more robust architecture, better tooling, and support for extensions and plugins. + +All users are encouraged to migrate Custom Functions to Components. See the [Components](../components/overview.md) documentation for the modern approach. + +Reach out to [support@harperdb.io](mailto:support@harperdb.io) or join our community [Discord](https://harper.fast/discord) if you have questions. diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 1fb38856..29e72db0 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -372,6 +372,11 @@ "type": "doc", "id": "database/transaction", "label": "Transaction Logging" + }, + { + "type": "doc", + "id": "database/sql", + "label": "SQL" } ] }, @@ -482,6 +487,11 @@ "type": "doc", "id": "legacy/cloud", "label": "Harper Cloud" + }, + { + "type": "doc", + "id": "legacy/custom-functions", + "label": "Custom Functions" } ] } diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index 704ec957..9ce62475 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -284,7 +284,7 @@ Based on migration map and reference plan, recommend this order. Each section is - `operations.md` - _Note: Broken out from Security section during migration; RBAC content warranted its own top-level section._ -3. **REST** (`reference_versioned_docs/version-v4/rest/`) +3. **REST** (`reference_versioned_docs/version-v4/rest/`) — **Complete** - `overview.md` - `querying.md` - `headers.md` @@ -309,36 +309,35 @@ Based on migration map and reference plan, recommend this order. Each section is - `global-apis.md` - `query-optimization.md` -6. **Components** (`reference_versioned_docs/version-v4/components/`) +6. **Components** (`reference_versioned_docs/version-v4/components/`) — **Complete** - `overview.md` - `applications.md` - `extension-api.md` - `plugin-api.md` + - `javascript-environment.md` _(added during migration — JS environment details warranted its own page)_ -7. **Replication** (`reference_versioned_docs/version-v4/replication/`) +7. **Replication** (`reference_versioned_docs/version-v4/replication/`) — **Complete** - `overview.md` - `clustering.md` - `sharding.md` -**Phase 1D - Cross-Cutting Sections** +**Phase 1D - Cross-Cutting Sections** — **Complete** -1. **Operations API** (`reference_versioned_docs/version-v4/operations-api/`) +1. **Operations API** (`reference_versioned_docs/version-v4/operations-api/`) — **Complete** - `overview.md` - `operations.md` -2. **Configuration** (`reference_versioned_docs/version-v4/configuration/`) +2. **Configuration** (`reference_versioned_docs/version-v4/configuration/`) — **Complete** - `overview.md` - `options.md` - `operations.md` -**Phase 1E - Legacy Content** +**Phase 1E - Legacy Content** — **Complete** -1. **Legacy** (`reference_versioned_docs/version-v4/legacy/`) - - `cloud/` - Harper Cloud landing page to direct users to Fabric instead - - `custom-functions/` (entire folder as-is) - - `sql/` (entire folder as-is) - -(But ensure we reflect version changes from v4.1 to v4.7 using version annotations) +1. **Legacy** (`reference_versioned_docs/version-v4/legacy/`) — **Complete** + - `cloud.md` - Harper Cloud landing page directing users to Fabric + - `custom-functions.md` - What Custom Functions were; directs to Components + - ~~`sql.md`~~ - Moved to `database/sql.md` (SQL is documented content, not just a deprecation notice) --- diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index d0293de8..928ce5ec 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -64,7 +64,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: - Current `reference/configuration.md` - `versioned_docs/version-4.1/configuration.md` (baseline) -- **Status**: Not Started +- **Status**: Complete - **Notes**: Must include a dedicated section on environment variable configuration. Content researched and ready from the environment-variables migration: - **Naming convention**: YAML keys map to `SCREAMING_SNAKE_CASE` env vars (e.g. `http.port` → `HTTP_PORT`, `operationsApi.network.port` → `OPERATIONSAPI_NETWORK_PORT`). Case-insensitive. Component configuration cannot be set this way. - **`HDB_CONFIG`**: CLI/ENV variable to specify a custom config file path at install time. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 51-55. @@ -85,7 +85,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Additional Sources**: Compare all version-X/deployments/configuration.md files - **Merge Required**: Yes - configuration options added across versions - **Version Annotations**: Each config option needs version introduced -- **Status**: Not Started +- **Status**: Complete - **Notes**: This will be a large migration task - the current configuration.md is 59KB - **Release Notes**: Major config changes across many versions - see all major releases @@ -94,7 +94,7 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/configuration.md` - **Additional Sources**: Earlier versions for feature evolution - **Version Annotations**: Track when ops were added -- **Status**: Not Started +- **Status**: Complete --- @@ -105,14 +105,14 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/index.md` - **Additional Sources**: - `versioned_docs/version-4.2/developers/operations-api/index.md` (first structured ops api section) -- **Status**: Not Started +- **Status**: Complete ### `reference/operations-api/operations.md` - **Primary Source**: Synthesize from all `versioned_docs/version-4.7/developers/operations-api/*.md` files - **Merge Required**: Yes - comprehensive list linking to primary references - **Version Annotations**: Each operation needs version introduced -- **Status**: Not Started +- **Status**: Complete - **Notes**: This should be a simplified reference table/list with links to detailed docs in feature sections --- @@ -226,7 +226,7 @@ Broken out from the security section during migration — RBAC warrants its own - Components concept: v4.2.0 - Applications/Extensions: v4.3.0+ - Plugin API: v4.6.0 -- **Status**: In Progress +- **Status**: Complete - **Notes**: This is a critical page that explains the evolution - **Release Notes**: - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Custom functions with worker threads @@ -240,7 +240,7 @@ Broken out from the security section during migration — RBAC warrants its own - `versioned_docs/version-4.7/developers/applications/*.md` - Current `reference/components/applications.md` - **Merge Required**: Yes - application developer docs scattered across multiple files -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture, NPM/GitHub deployment @@ -249,7 +249,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/components/extensions.md` - **Additional Sources**: Current `reference/components/extensions.md` - **Version Annotations**: Extension API formalized around v4.4-4.5 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API @@ -258,11 +258,16 @@ Broken out from the security section during migration — RBAC warrants its own - **Primary Source**: `versioned_docs/version-4.7/reference/components/plugins.md` - **Additional Sources**: Current `reference/components/plugins.md` - **Version Annotations**: Added in v4.6.0 -- **Status**: In Progress +- **Status**: Complete - **Release Notes**: - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Plugin API introduced - [4.7.0](release-notes/v4-tucker/4.7.0.md) - Further plugin API improvements +### `reference/components/javascript-environment.md` + +- **Status**: Complete +- **Notes**: Added during migration — JavaScript environment details for component development warranted its own page. Not in the original plan. + --- ## Database Section @@ -376,6 +381,15 @@ Broken out from the security section during migration — RBAC warrants its own - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Balanced audit log cleanup - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Transaction reuse, storage reclamation (audit log eviction) +### `reference/database/sql.md` + +- **Primary Source**: `versioned_docs/version-4.7/reference/sql-guide/` (all files) +- **Additional Sources**: + - `versioned_docs/version-4.7/developers/operations-api/sql-operations.md` +- **Merge Required**: Yes — consolidates all sql-guide sub-pages into one +- **Status**: Complete +- **Notes**: Moved here from Legacy section. Includes a prominent warning that SQL is not recommended for production use or large tables. Covers DML syntax, features matrix, all function categories (aggregate, string, math, logical, date/time), SEARCH_JSON, geospatial functions, and reserved words. + --- ## Resources Section @@ -647,7 +661,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Additional Sources**: Current `reference/replication/` (if exists) - **Version Annotations**: - Native Replication (Plexus): v4.4.0 -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native replication system (Plexus), replicated operations - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information, improved replication timestamps @@ -660,7 +674,7 @@ Broken out from the security section during migration — RBAC warrants its own - `versioned_docs/version-4.7/developers/operations-api/clustering.md` - Current `reference/clustering/` folder - **Merge Required**: Yes - extensive clustering documentation needs consolidation -- **Status**: Not Started +- **Status**: Complete - **Notes**: Large section with many sub-pages - **Release Notes**: - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Clone node functionality @@ -673,7 +687,7 @@ Broken out from the security section during migration — RBAC warrants its own - **Version Annotations**: - Sharding: v4.4.0 - Expanded functionality: v4.5.0 -- **Status**: Not Started +- **Status**: Complete - **Release Notes**: - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Sharding introduced - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Expanded sharding functionality @@ -735,21 +749,17 @@ Broken out from the security section during migration — RBAC warrants its own - **Status**: Complete - **Notes**: The primary and additional sources are to be completely removed and this section is to act as a basic landing page to direct users to Fabric instead. -### `reference/legacy/custom-functions/` +### `reference/legacy/custom-functions.md` -- **Primary Source**: `versioned_docs/version-4.1/custom-functions/*` +- **Primary Source**: `versioned_docs/version-4.1/custom-functions/index.md` - **Additional Sources**: `versioned_docs/version-4.7/developers/operations-api/custom-functions.md` -- **Status**: N/A -- **Notes**: Move as-is with deprecation notice pointing to Components +- **Status**: Complete +- **Notes**: Single page (not a folder) — describes what Custom Functions were and directs users to the Components section as the modern alternative. -### `reference/legacy/sql/` +### ~~`reference/legacy/sql.md`~~ -- **Primary Source**: `versioned_docs/version-4.7/reference/sql-guide/*` -- **Additional Sources**: - - `versioned_docs/version-4.7/developers/operations-api/sql-operations.md` - - Current `reference/sql-guide/` - **Status**: N/A -- **Notes**: Move entire section as-is with deprecation notice +- **Notes**: Moved to `reference/database/sql.md` — SQL is documented content (not just a deprecation notice), so it belongs in the Database section. See entry there. --- From 4f3fa639619e2e11c32b6c3bbe3efef9615d44ff Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 08:51:05 -0600 Subject: [PATCH 32/51] clean up progress before starting on other fixes --- .../version-v4/resources/resource-api.md | 2 +- .../version-v4/rest/content-types.md | 2 +- v4-docs-implementation-plan.md | 3 ++- v4-docs-migration-map.md | 20 ++++++++----------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/reference_versioned_docs/version-v4/resources/resource-api.md b/reference_versioned_docs/version-v4/resources/resource-api.md index 47fdca7b..8c047d48 100644 --- a/reference_versioned_docs/version-v4/resources/resource-api.md +++ b/reference_versioned_docs/version-v4/resources/resource-api.md @@ -544,7 +544,7 @@ export class BlogPost extends tables.BlogPost { } ``` -See [Global APIs — transaction](./global-apis.md#transaction) for explicitly starting transactions outside of request handlers. +See [JavaScript Environment — transaction](../components/javascript-environment.md#transactionfn) for explicitly starting transactions outside of request handlers. --- diff --git a/reference_versioned_docs/version-v4/rest/content-types.md b/reference_versioned_docs/version-v4/rest/content-types.md index bcd2217f..23638472 100644 --- a/reference_versioned_docs/version-v4/rest/content-types.md +++ b/reference_versioned_docs/version-v4/rest/content-types.md @@ -61,7 +61,7 @@ Using the `Accept` header is the recommended approach for clean, standard HTTP i ## Custom Content Types -Harper's content type system is extensible. Custom handlers for any serialization format (XML, YAML, proprietary formats, etc.) can be registered in the [`contentTypes`](../resources/global-apis.md) global Map. +Harper's content type system is extensible. Custom handlers for any serialization format (XML, YAML, proprietary formats, etc.) can be registered in the [`contentTypes`](../components/javascript-environment.md#contenttypes) global Map. ## Storing Arbitrary Content Types diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md index 9ce62475..b52e038f 100644 --- a/v4-docs-implementation-plan.md +++ b/v4-docs-implementation-plan.md @@ -277,6 +277,7 @@ Based on migration map and reference plan, recommend this order. Each section is - `certificate-management.md` - `certificate-verification.md` - `configuration.md` _(consolidated from planned `cors.md` + `ssl.md`)_ + - `api.md` _(added during migration — not in original plan)_ 2. **Users and Roles** (`reference_versioned_docs/version-v4/users-and-roles/`) — **Complete** - `overview.md` @@ -306,7 +307,7 @@ Based on migration map and reference plan, recommend this order. Each section is 5. **Resources** (`reference_versioned_docs/version-v4/resources/`) — **Complete** - `overview.md` - `resource-api.md` - - `global-apis.md` + - ~~`global-apis.md`~~ _(not created — content covered by `components/javascript-environment.md`)_ - `query-optimization.md` 6. **Components** (`reference_versioned_docs/version-v4/components/`) — **Complete** diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md index 928ce5ec..3f89e44d 100644 --- a/v4-docs-migration-map.md +++ b/v4-docs-migration-map.md @@ -176,6 +176,11 @@ This document maps existing documentation paths from `versioned_docs/version-4.X - **Status**: Complete - **Notes**: Covers authentication configuration (authorizeLocal, cacheTTL, enableSessions, token timeouts, hashFunction), CORS, and SSL/TLS settings. Originally planned as separate `cors.md` and `ssl.md` pages; consolidated into a single `configuration.md` during migration. +### `reference/security/api.md` + +- **Status**: Complete +- **Notes**: Added during migration — not in the original plan. Security-related API reference. + --- ## Users and Roles Section @@ -423,19 +428,10 @@ Broken out from the security section during migration — RBAC warrants its own - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Property forwarding - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Resource API upgrades -### `reference/resources/global-apis.md` +### ~~`reference/resources/global-apis.md`~~ -- **Primary Source**: `versioned_docs/version-4.7/reference/globals.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/reference/transactions.md` - - Current `reference/globals.md` - - Current `reference/transactions.md` -- **Merge Required**: Yes - consolidate global APIs (tables, databases, transactions, etc.) -- **Version Annotations**: Various APIs added across versions -- **Status**: Complete -- **Notes**: Should reference out to http/api.md for `server` global -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Table.getRecordCount() +- **Status**: N/A +- **Notes**: Content superseded by `reference/components/javascript-environment.md`, which covers all global APIs (`tables`, `databases`, `transaction`, `createBlob`, `Resource`, `server`, `contentTypes`, `logger`) and references out to the appropriate sections for full detail. The two broken links in `resources/resource-api.md:547` and `rest/content-types.md:64` need to be updated to point to `../components/javascript-environment.md`. ### `reference/resources/query-optimization.md` From dd8fc4feddf047dcaceadacc0a8043c54cca62ae Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 15:17:18 -0600 Subject: [PATCH 33/51] Link resolution (#467) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs(link-resolution): resolve link placeholders in CLI section - cli/overview.md: resolved Operations API and Configuration links - cli/commands.md: resolved Configuration link - cli/operations-api-commands.md: resolved all TODO links; mapped operations table category links to operations-api/operations.md with anchors (no sub-pages exist); resolved Applications → components/overview.md Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in Fastify Routes, Studio, GraphQL Querying - fastify-routes/overview.md: resolved Resources and REST overview links - studio/overview.md: resolved configuration options and Operations API links - graphql-querying/overview.md: resolved schema definition, Resources, and Resource Query API links Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in Env Vars, Static Files, HTTP, MQTT - environment-variables/overview.md: resolved Configuration and Components links - static-files/overview.md: resolved all Components overview links - http/overview.md: resolved REST Overview link - http/configuration.md: resolved Configuration Overview link - http/tls.md: resolved Operations API Configuration links (x2) - http/api.md: resolved Operations API and REST Overview links - mqtt/overview.md: resolved REST Overview link - mqtt/configuration.md: resolved Configuration Overview link Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in Logging and Analytics - logging/configuration.md: resolved Configuration Overview link - logging/operations.md: resolved Operations API Overview link - analytics/overview.md: resolved Configuration Overview links (x2) - analytics/operations.md: resolved search_by_conditions and Operations API Overview links Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in Security - security/overview.md: resolved Configuration Operations links (CORS network, TLS) - security/mtls-authentication.md: resolved Replication clustering link - security/certificate-verification.md: resolved Replication clustering link - security/certificate-management.md: resolved Replication clustering link Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in REST, Components, Resources, Replication - rest/websockets.md: resolved Resource API and Resources overview links - rest/server-sent-events.md: resolved Resources overview link - components/overview.md: fixed malformed TODO-as-link-text for Resource API and Schema links - components/extension-api.md: resolved server global API link - components/plugin-api.md: resolved Resource API and server global API links - resources/overview.md: resolved REST Overview and MQTT Overview links - resources/query-optimization.md: resolved Schema documentation link - replication/overview.md: resolved Certificate Verification and Certificate Management links Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in Database section - database/overview.md: resolved Resource API, Resources, Operations API, Configuration links - database/schema.md: resolved Operations API, Resources, Configuration links - database/api.md: resolved Resource API (x3) and Configuration links - database/data-loader.md: resolved Extension API and Components links - database/storage-algorithm.md: resolved Configuration links (x2) - database/jobs.md: resolved Operations API links (x2) - database/system-tables.md: resolved Replication clustering, Replication overview, Operations API links - database/compaction.md: resolved Configuration link - database/transaction.md: resolved Replication and Operations API links Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): resolve link placeholders in Operations API and Configuration - operations-api/overview.md: resolved Configuration Overview link - operations-api/operations.md: resolved SQL (→ database/sql.md) and Configuration links - configuration/operations.md: resolved restart and restart_service operation links Co-Authored-By: Claude Sonnet 4.6 * docs(link-resolution): delete resolved link placeholder tracker files All link placeholders have been resolved. Removing tracker files now that link resolution is complete. Co-Authored-By: Claude Sonnet 4.6 * chore: update MEMORY.md — Part 3 link resolution complete Co-Authored-By: Claude Sonnet 4.6 * fix(reference): resolve broken anchors in reference/v4 docs - security/overview: remove non-existent #network and #tls anchors from configuration/operations links - replication/overview: remove non-existent #operations-api anchor from clustering link - http/overview: fix server.http() anchor (#serverhttp → #serverhttplistener-options) - logging/api: fix logging.external anchor (#logging-external → #loggingexternal) - configuration/operations: fix restart_service anchor (#restart-service → #restart_service) - graphql-querying/overview: fix Resource Query API link (resources/overview#query → rest/querying) - components/applications: fix add_ssh_key anchor (#add-ssh-key → #add_ssh_key) - components/plugin-api: add explicit {#scopehandleentry} id to handleEntry heading - resources/overview: fix empty database/schema link Co-Authored-By: Claude Sonnet 4.6 * update memory docs * format --------- Co-authored-by: Claude Sonnet 4.6 --- memory/MEMORY.md | 77 +++++ memory/part4-plan.md | 143 +++++++++ .../analytics-link-placeholders.md | 26 -- .../cli-link-placeholders.md | 282 ------------------ .../components-link-placeholders.md | 65 ---- .../configuration-link-placeholders.md | 61 ---- .../database-link-placeholders.md | 135 --------- ...environment-variables-link-placeholders.md | 19 -- .../fastify-routes-link-placeholders.md | 11 - .../graphql-querying-link-placeholders.md | 14 - .../http-link-placeholders.md | 48 --- .../logging-link-placeholders.md | 34 --- .../mqtt-link-placeholders.md | 32 -- .../operations-api-link-placeholders.md | 131 -------- .../replication-link-placeholders.md | 11 - .../resources-link-placeholders.md | 21 -- .../rest-link-placeholders.md | 39 --- .../security-link-placeholders.md | 31 -- .../static-files-link-placeholders.md | 15 - .../studio-link-placeholders.md | 11 - .../version-v4/analytics/operations.md | 18 +- .../version-v4/analytics/overview.md | 4 +- .../version-v4/cli/commands.md | 4 +- .../version-v4/cli/operations-api-commands.md | 170 +++++------ .../version-v4/cli/overview.md | 6 +- .../version-v4/components/applications.md | 2 +- .../version-v4/components/extension-api.md | 2 +- .../version-v4/components/overview.md | 4 +- .../version-v4/components/plugin-api.md | 6 +- .../version-v4/configuration/operations.md | 2 +- .../version-v4/database/api.md | 8 +- .../version-v4/database/compaction.md | 2 +- .../version-v4/database/data-loader.md | 4 +- .../version-v4/database/jobs.md | 4 +- .../version-v4/database/overview.md | 8 +- .../version-v4/database/schema.md | 6 +- .../version-v4/database/storage-algorithm.md | 4 +- .../version-v4/database/system-tables.md | 6 +- .../version-v4/database/transaction.md | 4 +- .../environment-variables/overview.md | 6 +- .../version-v4/fastify-routes/overview.md | 4 +- .../version-v4/graphql-querying/overview.md | 4 +- .../version-v4/http/api.md | 4 +- .../version-v4/http/configuration.md | 2 +- .../version-v4/http/overview.md | 4 +- .../version-v4/http/tls.md | 4 +- .../version-v4/logging/api.md | 2 +- .../version-v4/logging/configuration.md | 2 +- .../version-v4/logging/operations.md | 2 +- .../version-v4/mqtt/configuration.md | 2 +- .../version-v4/mqtt/overview.md | 2 +- .../version-v4/operations-api/operations.md | 4 +- .../version-v4/operations-api/overview.md | 2 +- .../version-v4/replication/overview.md | 6 +- .../version-v4/resources/overview.md | 6 +- .../resources/query-optimization.md | 2 +- .../version-v4/rest/server-sent-events.md | 2 +- .../version-v4/rest/websockets.md | 4 +- .../security/certificate-management.md | 2 +- .../security/certificate-verification.md | 2 +- .../security/mtls-authentication.md | 2 +- .../version-v4/security/overview.md | 4 +- .../version-v4/static-files/overview.md | 6 +- .../version-v4/studio/overview.md | 4 +- 64 files changed, 394 insertions(+), 1160 deletions(-) create mode 100644 memory/MEMORY.md create mode 100644 memory/part4-plan.md delete mode 100644 migration-context/link-placeholders/analytics-link-placeholders.md delete mode 100644 migration-context/link-placeholders/cli-link-placeholders.md delete mode 100644 migration-context/link-placeholders/components-link-placeholders.md delete mode 100644 migration-context/link-placeholders/configuration-link-placeholders.md delete mode 100644 migration-context/link-placeholders/database-link-placeholders.md delete mode 100644 migration-context/link-placeholders/environment-variables-link-placeholders.md delete mode 100644 migration-context/link-placeholders/fastify-routes-link-placeholders.md delete mode 100644 migration-context/link-placeholders/graphql-querying-link-placeholders.md delete mode 100644 migration-context/link-placeholders/http-link-placeholders.md delete mode 100644 migration-context/link-placeholders/logging-link-placeholders.md delete mode 100644 migration-context/link-placeholders/mqtt-link-placeholders.md delete mode 100644 migration-context/link-placeholders/operations-api-link-placeholders.md delete mode 100644 migration-context/link-placeholders/replication-link-placeholders.md delete mode 100644 migration-context/link-placeholders/resources-link-placeholders.md delete mode 100644 migration-context/link-placeholders/rest-link-placeholders.md delete mode 100644 migration-context/link-placeholders/security-link-placeholders.md delete mode 100644 migration-context/link-placeholders/static-files-link-placeholders.md delete mode 100644 migration-context/link-placeholders/studio-link-placeholders.md diff --git a/memory/MEMORY.md b/memory/MEMORY.md new file mode 100644 index 00000000..e4dd2858 --- /dev/null +++ b/memory/MEMORY.md @@ -0,0 +1,77 @@ +# Documentation Migration Memory + +## Project Overview + +Harper v4 docs migration: consolidating `versioned_docs/version-4.X/` → `reference_versioned_docs/version-v4/` with feature-based reorganization. + +- **Working branch**: `major-version-reorg` (all migration PRs target this branch) +- **Target dir**: `reference_versioned_docs/version-v4/` +- **Do NOT touch**: `versioned_docs/` or `reference/` + +## Key Files + +- `v4-docs-implementation-plan.md` — Agent instructions (follow Part 1 closely) +- `v4-docs-migration-map.md` — Authoritative source-to-target mapping per section +- `v4-docs-reference-plan.md` — Structure philosophy and outline +- `reference_versioned_sidebars/version-v4-sidebars.json` — Sidebar to update for each section +- ~~`migration-context/link-placeholders/`~~ — **Deleted** (Part 3 link resolution complete) + +## Release Notes Location + +`release-notes/v4-tucker/4.X.0.md` (NOT `release_notes/`) + +## Completed Sections + +All Phase 1A–1D sections are complete and merged: + +- CLI, GraphQL Querying, Studio, Fastify Routes (Phase 1A) +- Environment Variables, Static Files, HTTP, MQTT, Logging, Analytics (Phase 1B) +- Security, Users & Roles, REST (PR #457), Database (PR #458), Resources (PR #459), Components (PR #460), Replication (PR #461) (Phase 1C) +- Operations API (PR #462), Configuration (PR #463) (Phase 1D) + +## Key Decisions / Learnings + +- Each section gets its own branch `migration/[section-name]` off `major-version-reorg` (for phase 1 content generation) +- PRs are draft by default, opened against `major-version-reorg` +- `@relationship` in v4.7 source (not `@relation` from 4.3 release notes) — needs human verification +- Audit log required for real-time messaging (MQTT/WebSocket) — verify still true +- `schema.md` kept unified (overview + blobs + vectors); consider splitting if too long +- System tables include: `hdb_raw_analytics`, `hdb_analytics`, `hdb_dataloader_hash`, `hdb_nodes`, `hdb_certificate` +- Analytics detail lives in `analytics/overview.md`, not `database/system-tables.md` +- Components section added `javascript-environment.md` (not in original plan) + +## Next Steps + +**Part 3 (Link Resolution) — Complete** on `link-resolution` branch (10 commits). Merge to `major-version-reorg` via PR review, then continue: + +**Part 4 (Cross-Reference Updates)** — Full plan in [`memory/part4-plan.md`](part4-plan.md). + +- Branch: `cross-reference-updates` off `major-version-reorg` +- Scope: ~7 release note files + 1 learn guide with old `/docs/` links +- **First step**: verify URL prefix for new reference pages (check `docusaurus.config.js`) + +**Part 5 (Redirects)** — Configure redirects from old paths (`/docs/reference/`, `/docs/developers/`, etc.) to new paths in `docusaurus.config.js`. + +### Part 3 Key Decisions + +- Operations table category links (e.g. `../operations-api/database.md`) → `../operations-api/operations.md` with section anchors (no sub-pages exist) +- `resources/global-apis.md` never created → links redirected to `../components/javascript-environment.md` +- SQL operations link → `../database/sql.md` (SQL moved from legacy per migration map) +- `[Applications](TODO:applications/overview.md)` → `../components/overview.md` +- Malformed `[TODO:path](TODO:path)` links in `components/overview.md` fixed with proper text + +Legacy section: single files only (no subfolders): `cloud.md`, `custom-functions.md`. SQL moved to `database/sql.md`. + +## Sidebar Pattern + +```json +{ + "type": "category", + "label": "Section Name", + "collapsible": false, + "className": "learn-category-header", + "items": [{ "type": "doc", "id": "section/page", "label": "Label" }] +} +``` + +Insert new sections before the Legacy category at the bottom of the sidebar. diff --git a/memory/part4-plan.md b/memory/part4-plan.md new file mode 100644 index 00000000..1d243338 --- /dev/null +++ b/memory/part4-plan.md @@ -0,0 +1,143 @@ +# Part 4: Cross-Reference Updates — Plan & Procedure + +## Overview + +Update links in `release-notes/` and `learn/` that point to old doc paths, mapping them to the new `reference_versioned_docs/version-v4/` structure. + +**Branch**: Create a new branch `cross-reference-updates` off `major-version-reorg` (after `link-resolution` is merged). + +**Commit strategy**: One commit per file group (release notes in one commit, learn guides in another, or broken down further if large). + +--- + +## Scope of Changes + +### Release Notes (`release-notes/v4-tucker/`) + +171 files total. Only ~7 files have `/docs/` links that need updating. The full list of unique links found (grep: `(/docs/[^)"\ ]*)` across all `release-notes/v4-tucker/*.md`): + +| Old Path | New Path | Notes | +| ------------------------------------------------------------------------------------- | ------------------------------------------------------------ | --------------------------------------------------- | +| `/docs/deployments/configuration` | `/docs/v4/configuration/overview` | 7 occurrences | +| `/docs/reference/resources` | `/docs/v4/resources/overview` | 4 occurrences | +| `/docs/developers/applications/defining-schemas` | `/docs/v4/database/schema` | 4 occurrences | +| `/docs/reference/graphql` | `/docs/v4/graphql-querying/overview` | 1 occurrence | +| `/docs/reference/components/extensions` | `/docs/v4/components/extension-api` | 1 occurrence | +| `/docs/reference/components/applications?_highlight=github#adding-components-to-root` | `/docs/v4/components/applications#adding-components-to-root` | 1 occurrence | +| `/docs/reference/blob` | `/docs/v4/database/schema#blob-storage` | 1 occurrence | +| `/docs/developers/rest` | `/docs/v4/rest/overview` | 1 occurrence | +| `/docs/developers/replication/sharding` | `/docs/v4/replication/sharding` | 1 occurrence | +| `/docs/developers/replication/` | `/docs/v4/replication/overview` | 1 occurrence | +| `/docs/developers/real-time` | `/docs/v4/rest/websockets` | 1 occurrence (real-time = websockets+SSE+MQTT) | +| `/docs/developers/operations-api/clustering` | `/docs/v4/replication/clustering` | 1 occurrence | +| `/docs/developers/applications/data-loader` | `/docs/v4/database/data-loader` | 1 occurrence | +| `/docs/deployments/harper-cli` | `/docs/v4/cli/overview` | 1 occurrence | +| `/docs/administration/logging/` | `/docs/v4/logging/overview` | 1 occurrence | +| `/docs/administration/cloning` | N/A — learn guide (not in reference) | Leave or link to learn guide if exists | +| `/docs/4.1/custom-functions/host-static` | `/docs/v4/legacy/custom-functions` | Legacy redirect | +| `/docs/4.1/configuration#storage` | `/docs/v4/configuration/options#storage` | 1 occurrence | +| `/docs/4.1/configuration#session-affinity` | `/docs/v4/configuration/options#http` | 1 occurrence (http section covers session affinity) | +| `/docs/4.1/configuration#schemas` | `/docs/v4/database/schema` | 1 occurrence | + +> **NOTE**: The exact URL prefix for the new structure (`/docs/v4/`) needs to be verified. Check `docusaurus.config.js` or `reference_versioned_sidebars/version-v4-sidebars.json` for the versioned path prefix. It may be `/docs/v4/` or `/reference/v4/` or similar. + +**Files that contain links (to edit):** + +- `release-notes/v4-tucker/4.1.0.md` — `/docs/4.1/configuration#*` and `/docs/4.1/custom-functions/*` +- `release-notes/v4-tucker/4.2.0.md` — `/docs/reference/resources`, `/docs/reference/components/*` +- `release-notes/v4-tucker/4.3.0.md` — `/docs/reference/resources` +- `release-notes/v4-tucker/4.4.0.md` — `/docs/developers/applications/defining-schemas`, `/docs/reference/resources`, `/docs/reference/graphql` +- `release-notes/v4-tucker/4.5.0.md` — `/docs/reference/blob`, `/docs/deployments/configuration` + +**To find all affected files precisely**: `grep -rl "/docs/" release-notes/v4-tucker/` + +--- + +### Learn Guides (`learn/`) + +Only 4 content files currently exist (most are stubs): + +- `learn/developers/harper-applications-in-depth.mdx` +- `learn/getting-started/create-your-first-application.mdx` +- `learn/getting-started/install-and-connect-harper.mdx` +- `learn/index.mdx` + +Links found in `harper-applications-in-depth.mdx`: + +| Old Path | New Path | +| ------------------------------------------------ | ------------------------------------------------------------ | +| `/docs/reference/components/built-in-extensions` | `/docs/v4/components/overview#built-in-extensions-reference` | +| `/docs/reference/resources` | `/docs/v4/resources/overview` | +| `/docs/reference/globals#logger` | `/docs/v4/logging/api` | +| `/docs/reference/resources/` | `/docs/v4/resources/overview` | +| `/docs/reference/components/` | `/docs/v4/components/overview` | + +--- + +## Procedure + +### Step 1: Verify URL prefix + +Before editing any links, confirm what the new URL prefix is for `reference_versioned_docs/version-v4/`. Check: + +```bash +cat docusaurus.config.js | grep -A5 "reference_versioned" +# or +cat reference_versioned_sidebars/version-v4-sidebars.json | head -5 +``` + +The prefix is likely `/docs/v4/` but confirm before proceeding. + +### Step 2: Find all affected release note files + +```bash +grep -rl "/docs/" release-notes/v4-tucker/ +``` + +This gives the exact list of files to edit. + +### Step 3: Edit release notes + +For each affected file, replace old `/docs/` paths with new `/docs/v4/` paths per the mapping table above. + +### Step 4: Edit learn guides + +Read each of the 4 learn guide files, apply the mapping table above. + +### Step 5: Check for any remaining old-path links across the whole repo + +```bash +grep -rn "/docs/reference/" --include="*.md" --include="*.mdx" release-notes/ learn/ +grep -rn "/docs/developers/" --include="*.md" --include="*.mdx" release-notes/ learn/ +grep -rn "/docs/deployments/" --include="*.md" --include="*.mdx" release-notes/ learn/ +grep -rn "/docs/administration/" --include="*.md" --include="*.mdx" release-notes/ learn/ +grep -rn "/docs/4\." --include="*.md" --include="*.mdx" release-notes/ learn/ +``` + +### Step 6: Commit + +- Commit release notes changes: `docs(cross-refs): update old /docs/ links in release notes to v4 reference paths` +- Commit learn guide changes: `docs(cross-refs): update old /docs/ links in learn guides to v4 reference paths` + +--- + +## Key Uncertainties to Resolve + +1. **URL prefix** — Confirm whether new reference pages are served at `/docs/v4/`, `/reference/v4/`, or another prefix. **Critical before editing any links.** +2. **`/docs/administration/cloning`** — This was flagged in migration map as "move to Learn guide." If no learn guide exists yet, either leave as-is (broken link) or remove the link text. +3. **`/docs/developers/real-time`** — This page covered WebSockets, SSE, and MQTT. Best split into: WebSockets content → `rest/websockets`, MQTT content → `mqtt/overview`. In context of release notes, pick whichever is most relevant to the surrounding text. + +--- + +## Non-Goals for Part 4 + +- Do NOT edit `versioned_docs/` files +- Do NOT edit `reference_versioned_docs/` files (those were handled in Part 3) +- Do NOT update links in the v1/v2/v3 release notes (out of scope) +- Do NOT update links in other config files (docusaurus.config.js, sidebars, etc.) — that's Part 5 + +--- + +## After Part 4 + +Proceed to **Part 5: Redirects** — configure redirects from old `/docs/developers/`, `/docs/reference/`, etc. paths to the new `/docs/v4/` equivalents in `docusaurus.config.js` (or wherever redirects are configured). diff --git a/migration-context/link-placeholders/analytics-link-placeholders.md b/migration-context/link-placeholders/analytics-link-placeholders.md deleted file mode 100644 index aaaeab94..00000000 --- a/migration-context/link-placeholders/analytics-link-placeholders.md +++ /dev/null @@ -1,26 +0,0 @@ -# Link Placeholders for Analytics Section - -## reference_versioned_docs/version-v4/analytics/overview.md - -- ~~Line 99: `[server.recordAnalytics()](TODO:reference_versioned_docs/version-v4/http/api.md)`~~ **RESOLVED** → `../http/api.md#serverrecordanalyticsvalue-metric-path-method-type` -- ~~Line 103: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)`~~ **RESOLVED** → `../http/api.md` -- ~~Line 105: `[Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md)`~~ **RESOLVED** → `../logging/configuration.md` -- ~~Line 110: `[HTTP API](TODO:reference_versioned_docs/version-v4/http/api.md)`~~ **RESOLVED** → `../http/api.md` -- ~~Line 111: `[Logging Configuration](TODO:reference_versioned_docs/version-v4/logging/configuration.md)`~~ **RESOLVED** → `../logging/configuration.md` - -- Line 106 + 112: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: analytics.aggregatePeriod configuration + related section - - Target should be: Configuration section overview page - - **Status**: PENDING (Configuration section migration) - -## reference_versioned_docs/version-v4/analytics/operations.md - -- Line 56: `[search_by_conditions](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` - - Context: Note that `conditions` parameter uses the same format as search_by_conditions - - Target should be: Operations API operations page - - **Status**: PENDING (Operations API section migration) - -- Line 77: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Related section at bottom of file - - Target should be: Operations API section overview page - - **Status**: PENDING (Operations API section migration) diff --git a/migration-context/link-placeholders/cli-link-placeholders.md b/migration-context/link-placeholders/cli-link-placeholders.md deleted file mode 100644 index fb3f79d3..00000000 --- a/migration-context/link-placeholders/cli-link-placeholders.md +++ /dev/null @@ -1,282 +0,0 @@ -# Link Placeholders for CLI Section - -This document tracks all link placeholders in the CLI section that need to be resolved once other sections are migrated. - -## reference_versioned_docs/version-v4/cli/overview.md - -- Line 30: `[CLI Authentication](TODO:reference_versioned_docs/version-v4/cli/authentication.md "CLI authentication details")` - - Context: Linking to CLI authentication details - - Target: CLI authentication page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 45: `[CLI Commands](TODO:reference_versioned_docs/version-v4/cli/commands.md "Detailed CLI command reference")` - - Context: Linking to detailed CLI commands - - Target: CLI commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 60: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview")` - - Context: Linking to Operations API overview - - Target: Operations API section overview page - - **Status**: PENDING (will be created in Operations API section migration) - -- Line 100: `[Operations API Commands](TODO:reference_versioned_docs/version-v4/cli/operations-api-commands.md "Operations API CLI commands reference")` - - Context: Linking to operations API commands via CLI - - Target: Operations API commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 126: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview")` - - Context: Linking to configuration options - - Target: Configuration section overview - - **Status**: PENDING (will be created in Configuration section migration) - -- Line 144: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API")` - - Context: Linking to full operations API reference - - Target: Operations API section overview page - - **Status**: PENDING (will be created in Operations API section migration) - -## reference_versioned_docs/version-v4/cli/commands.md - -- Line 9: `[Operations API Commands](TODO:reference_versioned_docs/version-v4/cli/operations-api-commands.md "Operations API CLI commands")` - - Context: Referring users to operations API commands documentation - - Target: Operations API commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 52: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration overview")` - - Context: Linking to configuration parameters - - Target: Configuration section overview - - **Status**: PENDING (will be created in Configuration section migration) - -- Line 125: `[Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Database compaction reference")` - - Context: Linking to database compaction details - - Target: Database section compaction page - - **Status**: PENDING (will be created in Database section migration) - -- Line 146: `[CLI Overview - Remote Operations](TODO:reference_versioned_docs/version-v4/cli/overview.md#remote-operations "Remote operations documentation")` - - Context: Linking to remote operations section in overview - - Target: CLI overview page, remote operations section - - **Status**: RESOLVED (file created in this migration) - -- Line 151: `[CLI Overview](TODO:reference_versioned_docs/version-v4/cli/overview.md "CLI overview")` - - Context: Linking back to CLI overview - - Target: CLI overview page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 152: `[Operations API Commands](TODO:reference_versioned_docs/version-v4/cli/operations-api-commands.md "Operations API commands")` - - Context: Linking to operations API commands - - Target: Operations API commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 153: `[CLI Authentication](TODO:reference_versioned_docs/version-v4/cli/authentication.md "CLI authentication")` - - Context: Linking to authentication mechanisms - - Target: CLI authentication page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 154: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration")` - - Context: Linking to configuration parameters - - Target: Configuration section overview - - **Status**: PENDING (will be created in Configuration section migration) - -- Line 155: `[Database Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md "Compaction")` - - Context: Linking to compaction details - - Target: Database section compaction page - - **Status**: PENDING (will be created in Database section migration) - -## reference_versioned_docs/version-v4/cli/operations-api-commands.md - -- Line 12: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview")` - - Context: Introduction paragraph - - Target: Operations API section overview page - - **Status**: PENDING (will be created in Operations API section migration) - -- Lines 38-109: 72 operations table entries with category links - - `[Database](TODO:../operations-api/database.md)` (9 operations) - - `[Data](TODO:../operations-api/data.md)` (9 operations) - - `[Security](TODO:../operations-api/security.md)` (17 operations) - - `[Clustering](TODO:../operations-api/clustering.md)` (4 operations) - - `[Components](TODO:../operations-api/components.md)` (9 operations) - - `[Configuration](TODO:../operations-api/configuration.md)` (2 operations) - - `[Authentication](TODO:../operations-api/authentication.md)` (2 operations) - - `[System](TODO:../operations-api/system.md)` (3 operations) - - `[Licensing](TODO:../operations-api/licensing.md)` (4 operations) - - `[Jobs](TODO:../operations-api/jobs.md)` (2 operations) - - `[Logging](TODO:../operations-api/logging.md)` (4 operations) - - `[Maintenance](TODO:../operations-api/maintenance.md)` (2 operations) - - `[Status](TODO:../operations-api/status.md)` (3 operations) - - Context: Operations table linking to operation category documentation - - Target: Operations API section category pages - - **Status**: PENDING (will be created in Operations API section migration) - -- Line 118: `[Operations API documentation](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Complete operations list")` - - Context: After Command Aliases section - - Target: Operations API section operations page - - **Status**: PENDING (will be created in Operations API section migration) - -- Line 150: `[Database Reference](TODO:reference_versioned_docs/version-v4/database/overview.md "Database reference documentation")` - - Context: Tip callout in Database Operations examples - - Target: Database section overview - - **Status**: PENDING (will be created in Database section migration) - -- Line 168: `[REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST API reference")` - - Context: Tip callout in Data Operations examples - - Target: REST section overview - - **Status**: PENDING (will be created in REST section migration) - -- Line 168: `[GraphQL Querying](TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md "GraphQL querying reference")` - - Context: Tip callout in Data Operations examples - - Target: GraphQL Querying section overview - - **Status**: PENDING (will be created in GraphQL Querying section migration) - -- Line 186: `[Configuration Reference](TODO:reference_versioned_docs/version-v4/configuration/overview.md "Configuration reference")` - - Context: Tip callout in Configuration Operations examples - - Target: Configuration section overview - - **Status**: PENDING (will be created in Configuration section migration) - -- Line 204: `[Components Reference](TODO:reference_versioned_docs/version-v4/components/overview.md "Components reference")` - - Context: Tip callout in Component Operations examples - - Target: Components section overview - - **Status**: PENDING (will be created in Components section migration) - -- Line 222: `[Security Reference](TODO:reference_versioned_docs/version-v4/security/overview.md "Security reference")` - - Context: Tip callout in User and Role Operations examples - - Target: Security section overview - - **Status**: PENDING (will be created in Security section migration) - -- Line 227: `[CLI Overview - Remote Operations](./overview.md#remote-operations)` - - Context: Remote Operations section - - Target: CLI overview page, remote operations anchor - - **Status**: RESOLVED (file created in this migration) - -- Line 245: `[Applications](TODO:reference_versioned_docs/version-v4/applications/overview.md "Applications reference")` - - Context: Remote Component Deployment section - - Target: Applications section (or Components section) - - **Status**: PENDING (will be created in future section migration) - -- Line 246: `[Deploying Harper Applications](TODO:learn_link "Deploying applications guide")` - - Context: Remote Component Deployment section - - Target: Learn guide on deploying applications - - **Status**: PENDING (external learn link) - -- Line 372: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API")` - - Context: Limitations section - - Target: Operations API section overview page - - **Status**: PENDING (will be created in Operations API section migration) - -- Line 376: `[CLI Overview](./overview.md)` - - Context: See Also section - - Target: CLI overview page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 377: `[CLI Commands](./commands.md)` - - Context: See Also section - - Target: CLI commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 378: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md "Operations API overview")` - - Context: See Also section - - Target: Operations API section overview page - - **Status**: PENDING (will be created in Operations API section migration) - -- Line 379: `[Operations API Reference](TODO:reference_versioned_docs/version-v4/operations-api/operations.md "Operations reference")` - - Context: See Also section - - Target: Operations API section operations page - - **Status**: PENDING (will be created in Operations API section migration) - -- Line 380: `[CLI Authentication](./authentication.md)` - - Context: See Also section - - Target: CLI authentication page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -## reference_versioned_docs/version-v4/cli/authentication.md - -- Line 196: `[Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles documentation")` - - Context: Linking to user management and permissions (Security Best Practices section) - - Target: Users and Roles section overview (moved to top-level section) - - **Status**: ~~RESOLVED~~ → `../users-and-roles/overview.md` - -- Line 204: `[Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md "Logging documentation")` - - Context: Linking to audit logging information (Security Best Practices section) - - Target: Logging section overview - - **Status**: ~~RESOLVED~~ → `../logging/overview.md` - -- Line 256: `[CLI Overview](./overview.md)` - - Context: See Also section - - Target: CLI overview page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 257: `[CLI Commands](./commands.md)` - - Context: See Also section - - Target: CLI commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 258: `[Operations API Commands](./operations-api-commands.md)` - - Context: See Also section - - Target: Operations API commands page (within CLI section) - - **Status**: RESOLVED (file created in this migration) - -- Line 259: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md "Security overview")` - - Context: See Also section - - Target: Security section overview page - - **Status**: ~~RESOLVED~~ → `../security/overview.md` - -- Line 260: `[Users and Roles](TODO:reference_versioned_docs/version-v4/security/users-and-roles.md "Users and roles")` - - Context: See Also section - - Target: Users and Roles section overview (moved to top-level section) - - **Status**: ~~RESOLVED~~ → `../users-and-roles/overview.md` - -## Summary - -### Resolved Links - -- 12 links to pages within the CLI section (resolved in initial migration) -- `../logging/overview.md` — resolved -- `../security/overview.md` — resolved (×2) -- `../users-and-roles/overview.md` — resolved (×2, was `security/users-and-roles.md`) -- `../graphql-querying/overview.md` — resolved (×1, in operations-api-commands.md) - -### Pending Links (Cross-Section References) - -**Operations API Section** (~82 links): - -- `reference_versioned_docs/version-v4/operations-api/overview.md` (5 occurrences) -- `reference_versioned_docs/version-v4/operations-api/operations.md` (2 occurrences) -- Operations table category pages (72 links): - - `../operations-api/database.md` - - `../operations-api/data.md` - - `../operations-api/security.md` - - `../operations-api/clustering.md` - - `../operations-api/components.md` - - `../operations-api/configuration.md` - - `../operations-api/authentication.md` - - `../operations-api/system.md` - - `../operations-api/licensing.md` - - `../operations-api/jobs.md` - - `../operations-api/logging.md` - - `../operations-api/maintenance.md` - - `../operations-api/status.md` - -**Configuration Section** (5 links): - -- `reference_versioned_docs/version-v4/configuration/overview.md` - -**Database Section** (3 links): - -- `reference_versioned_docs/version-v4/database/compaction.md` (2 occurrences) -- `reference_versioned_docs/version-v4/database/overview.md` (1 occurrence) - -**Components Section** (1 link): - -- `reference_versioned_docs/version-v4/components/overview.md` - -**REST Section** (1 link): - -- `reference_versioned_docs/version-v4/rest/overview.md` - -**Applications / Components Section** (1 link): - -- `reference_versioned_docs/version-v4/applications/overview.md` (note: this should likely be `components/overview.md`) - -**Learn Guides** (1 link): - -- Deploying Harper Applications guide (external learn link) - -**Total Pending Links**: ~89 diff --git a/migration-context/link-placeholders/components-link-placeholders.md b/migration-context/link-placeholders/components-link-placeholders.md deleted file mode 100644 index 442454fe..00000000 --- a/migration-context/link-placeholders/components-link-placeholders.md +++ /dev/null @@ -1,65 +0,0 @@ -# Link Placeholders for Components - -## reference_versioned_docs/version-v4/components/overview.md - -- Line (See Also section): `[TODO:reference_versioned_docs/version-v4/resources/resource-api.md]` - - Context: "See Also" section pointing to Resource API - - Target should be: `../resources/resource-api.md` - -- Line (See Also section): `[TODO:reference_versioned_docs/version-v4/database/schema.md]` - - Context: "See Also" section pointing to schema definition docs - - Target should be: `../database/schema.md` - -## reference_versioned_docs/version-v4/components/applications.md - -- Line (rest section): `[TODO:reference_versioned_docs/version-v4/rest/overview.md]` - - Context: Reference to REST interface documentation - - Target should be: `../rest/overview.md` - -- Line (graphqlSchema section): `[TODO:reference_versioned_docs/version-v4/database/schema.md]` - - Context: Reference to schema definition documentation - - Target should be: `../database/schema.md` - -- Line (jsResource section): `[TODO:reference_versioned_docs/version-v4/resources/resource-api.md]` - - Context: Reference to Resource API documentation - - Target should be: `../resources/resource-api.md` - -- Line (static section): `[TODO:reference_versioned_docs/version-v4/static-files/overview.md]` - - Context: Reference to static files documentation - - Target should be: `../static-files/overview.md` - -- Line (fastifyRoutes section): `[TODO:reference_versioned_docs/version-v4/fastify-routes/overview.md]` - - Context: Reference to Fastify routes documentation - - Target should be: `../fastify-routes/overview.md` - -- Line (graphql section): `[TODO:reference_versioned_docs/version-v4/graphql-querying/overview.md]` - - Context: Reference to GraphQL querying documentation - - Target should be: `../graphql-querying/overview.md` - -- Line (loadEnv section): `[TODO:reference_versioned_docs/version-v4/environment-variables/overview.md]` - - Context: Reference to environment variables documentation - - Target should be: `../environment-variables/overview.md` - -- Line (roles section): `[TODO:reference_versioned_docs/version-v4/users-and-roles/configuration.md]` - - Context: Reference to users and roles configuration - - Target should be: `../users-and-roles/configuration.md` - -- Line (dataLoader section): `[TODO:reference_versioned_docs/version-v4/database/data-loader.md]` - - Context: Reference to data loader documentation - - Target should be: `../database/data-loader.md` - -## reference_versioned_docs/version-v4/components/extension-api.md - -- Line (Protocol Extension section): `[TODO:reference_versioned_docs/version-v4/http/api.md]` - - Context: Reference to the `server` global API for custom networking - - Target should be: `../http/api.md` - -## reference_versioned_docs/version-v4/components/plugin-api.md - -- Line (`scope.resources`): `[TODO:reference_versioned_docs/version-v4/resources/resource-api.md]` - - Context: Reference to Resource class in scope.resources - - Target should be: `../resources/resource-api.md` - -- Line (`scope.server`): `[TODO:reference_versioned_docs/version-v4/http/api.md]` - - Context: Reference to HTTP server global API - - Target should be: `../http/api.md` diff --git a/migration-context/link-placeholders/configuration-link-placeholders.md b/migration-context/link-placeholders/configuration-link-placeholders.md deleted file mode 100644 index 113520bd..00000000 --- a/migration-context/link-placeholders/configuration-link-placeholders.md +++ /dev/null @@ -1,61 +0,0 @@ -# Link Placeholders for Configuration - -## reference_versioned_docs/version-v4/configuration/options.md - -- Line (http.mtls section): `[mTLS Authentication](TODO:reference_versioned_docs/version-v4/security/mtls-authentication.md)` - - Context: Referencing full mTLS authentication reference from HTTP mTLS config sub-section - - Target should be: `../security/mtls-authentication.md` - -- Line (http.mtls.certificateVerification): `[Certificate Verification](TODO:reference_versioned_docs/version-v4/security/certificate-verification.md)` - - Context: Referencing certificate revocation checking (CRL/OCSP) documentation - - Target should be: `../security/certificate-verification.md` - -- Line (tls section): `[TLS](TODO:reference_versioned_docs/version-v4/http/tls.md)` - - Context: Linking to TLS configuration reference in the HTTP section - - Target should be: `../http/tls.md` - -- Line (tls section): `[Certificate Management](TODO:reference_versioned_docs/version-v4/security/certificate-management.md)` - - Context: Linking to certificate management operations - - Target should be: `../security/certificate-management.md` - -- Line (mqtt section): `[MQTT Configuration](TODO:reference_versioned_docs/version-v4/mqtt/configuration.md)` - - Context: Referencing full MQTT configuration reference - - Target should be: `../mqtt/configuration.md` - -- Line (logging section): `[logger API](TODO:reference_versioned_docs/version-v4/logging/api.md)` - - Context: `logging.external` section referencing the logger API for components - - Target should be: `../logging/api.md` - -- Line (replication section): `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` - - Context: Referencing replication overview from replication config section - - Target should be: `../replication/overview.md` - -- Line (replication section): `[Clustering](TODO:reference_versioned_docs/version-v4/replication/clustering.md)` - - Context: Referencing clustering reference from replication config section - - Target should be: `../replication/clustering.md` - -- Line (replication.shard): `[Sharding](TODO:reference_versioned_docs/version-v4/replication/sharding.md)` - - Context: Referencing sharding documentation for `replication.shard` config option - - Target should be: `../replication/sharding.md` - -- Line (storage.compactOnStart): `[Compaction](TODO:reference_versioned_docs/version-v4/database/compaction.md)` - - Context: Referencing compaction documentation from `compactOnStart` storage option - - Target should be: `../database/compaction.md` - -- Line (localStudio section): `[Studio](TODO:reference_versioned_docs/version-v4/studio/overview.md)` - - Context: Referencing Studio overview from `localStudio` config section - - Target should be: `../studio/overview.md` - -- Line (Components section): `[Components](TODO:reference_versioned_docs/version-v4/components/overview.md)` - - Context: Referencing components overview from component config section - - Target should be: `../components/overview.md` - -## reference_versioned_docs/version-v4/configuration/operations.md - -- Line (set_configuration description): `[restart](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart)` - - Context: Linking to the restart system operation needed after set_configuration - - Target should be: `../operations-api/operations.md#restart` - -- Line (set_configuration description): `[restart_service](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart-service)` - - Context: Linking to the restart_service operation needed after set_configuration - - Target should be: `../operations-api/operations.md#restart-service` diff --git a/migration-context/link-placeholders/database-link-placeholders.md b/migration-context/link-placeholders/database-link-placeholders.md deleted file mode 100644 index 2d8a74f0..00000000 --- a/migration-context/link-placeholders/database-link-placeholders.md +++ /dev/null @@ -1,135 +0,0 @@ -# Link Placeholders for Database Section - -## reference_versioned_docs/version-v4/database/overview.md - -- Line ~37: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` - - Context: Mentioning custom resources as extension of the database system - - Target should be: Resource API reference page - -- Line ~55: `[REST](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - - Context: Related documentation footer - - Target should be: REST overview - -- Line ~56: `[Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md)` - - Context: Related documentation footer - - Target should be: Resources overview - -- Line ~57: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Related documentation footer - - Target should be: Operations API overview - -- Line ~58: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: Related documentation footer - - Target should be: Configuration overview - -## reference_versioned_docs/version-v4/database/schema.md - -- Line ~164: `[REST Querying](TODO:reference_versioned_docs/version-v4/rest/querying.md)` - - Context: How to query tables via HTTP using schema-defined relationships - - Target should be: REST querying reference - -- Line ~165: `[Resources](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` - - Context: Extending table behavior with custom resource logic - - Target should be: Resource API reference - -- Line ~167: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` - - Context: graphqlSchema component and storage configuration - - Target should be: Configuration options page - -- Line ~141 (Dynamic Schema section): `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` - - Context: NoSQL create_attribute/drop_attribute operations - - Target should be: Operations list page - -## reference_versioned_docs/version-v4/database/data-loader.md - -- Line ~13: `[Extension](TODO:reference_versioned_docs/version-v4/components/extension-api.md)` - - Context: dataLoader is an Extension component - - Target should be: Extension API reference - -- Line ~73: `[Components](TODO:reference_versioned_docs/version-v4/components/overview.md)` - - Context: Related documentation footer - - Target should be: Components overview - -## reference_versioned_docs/version-v4/database/storage-algorithm.md - -- Line ~45: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` - - Context: Storage configuration options (compression settings) - - Target should be: Configuration options page (storage section) - -## reference_versioned_docs/version-v4/database/jobs.md - -- Line ~128: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Related documentation footer - - Target should be: Operations API overview - -## reference_versioned_docs/version-v4/database/system-tables.md - -- Line ~82: `[Analytics](TODO:reference_versioned_docs/version-v4/analytics/overview.md)` - - Context: Full analytics metrics reference in related docs footer - - Target should be: Analytics overview - -- Line ~95: `[Replication](TODO:reference_versioned_docs/version-v4/replication/clustering.md)` - - Context: hdb_nodes used by clustering operations - - Target should be: Clustering reference - -- Line ~104: `[Analytics](TODO:reference_versioned_docs/version-v4/analytics/overview.md)` (second reference) - - Context: Related documentation footer - - Target should be: Analytics overview - -- Line ~105: `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` - - Context: Related documentation footer - - Target should be: Replication overview - -- Line ~106: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Querying system tables - - Target should be: Operations API overview - -## reference_versioned_docs/version-v4/database/compaction.md - -- Line ~38: `[CLI Commands](TODO:reference_versioned_docs/version-v4/cli/commands.md)` - - Context: copy-db CLI command - - Target should be: CLI commands reference - -- Line ~56: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` - - Context: Storage configuration options - - Target should be: Configuration options page (storage section) - -## reference_versioned_docs/version-v4/database/api.md - -- Line ~20: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` - - Context: Noting that table classes implement the Resource API - - Target should be: Resource API reference page - -- Line ~48: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` (second reference) - - Context: Pointing to full table method reference - - Target should be: Resource API reference page - -- Line ~186: `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md)` (Related Documentation) - - Context: Related docs footer - - Target should be: Resource API reference page - -- Line ~188: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` - - Context: Blob storage path configuration - - Target should be: Configuration options page (storage section) - -## reference_versioned_docs/version-v4/database/transaction.md - -- Line ~73: `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` - - Context: Clustering must be set up for transaction logs - - Target should be: Replication overview - -- Line ~148: `[Logging](TODO:reference_versioned_docs/version-v4/logging/overview.md)` - - Context: Distinction between app logging and transaction/audit logging - - Target should be: Logging overview - -- Line ~149: `[Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md)` (second reference) - - Context: Related documentation footer - - Target should be: Replication overview - -- Line ~150: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md)` - - Context: logging.auditLog global configuration - - Target should be: Configuration options page - -- Line ~151: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Related documentation footer - - Target should be: Operations API overview diff --git a/migration-context/link-placeholders/environment-variables-link-placeholders.md b/migration-context/link-placeholders/environment-variables-link-placeholders.md deleted file mode 100644 index 8943b2ec..00000000 --- a/migration-context/link-placeholders/environment-variables-link-placeholders.md +++ /dev/null @@ -1,19 +0,0 @@ -# Link Placeholders for Environment Variables - -## reference_versioned_docs/version-v4/environment-variables/overview.md - -- Line 14: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: Intro list item — "Harper configuration via environment variables — see Configuration" - - Target should be: Configuration section overview page - -- Line 67: `[Resource Extension](TODO:reference_versioned_docs/version-v4/components/overview.md)` - - Context: Describing that `loadEnv` is a Resource Extension supporting standard `files`/`urlPath` config options - - Target should be: Components overview page (covering the Resource Extension concept) - -- Line 79: `[Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md)` - - Context: "Related" section - - Target should be: Main components/extensions reference page - -- Line 80: `[Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: "Related" section — linking to where env var config details live - - Target should be: Configuration section overview page diff --git a/migration-context/link-placeholders/fastify-routes-link-placeholders.md b/migration-context/link-placeholders/fastify-routes-link-placeholders.md deleted file mode 100644 index 1abcd32f..00000000 --- a/migration-context/link-placeholders/fastify-routes-link-placeholders.md +++ /dev/null @@ -1,11 +0,0 @@ -# Link Placeholders for Fastify Routes - -## reference_versioned_docs/version-v4/fastify-routes/overview.md - -- Line 10: `[Custom Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md "Resources documentation")` - - Context: Explaining that Fastify routes are discouraged in favor of modern routing with Custom Resources - - Target should be: Resources overview page - -- Line 9: `[REST interface](TODO:reference_versioned_docs/version-v4/rest/overview.md "REST interface documentation")` - - Context: Recommending REST interface for better performance and standards compliance - - Target should be: REST overview page diff --git a/migration-context/link-placeholders/graphql-querying-link-placeholders.md b/migration-context/link-placeholders/graphql-querying-link-placeholders.md deleted file mode 100644 index 794837f6..00000000 --- a/migration-context/link-placeholders/graphql-querying-link-placeholders.md +++ /dev/null @@ -1,14 +0,0 @@ -# Link Placeholders for GraphQL Querying - -## reference_versioned_docs/version-v4/graphql-querying/overview.md - -- Line 17: `[defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md "Schema definition documentation")` - - Context: Introduction explaining GraphQL support for defining schemas - - Target should be: Schema definition documentation in Components/Applications section - - **Status**: PENDING (Components section migration) - -- ~~Line 17: `[Resources](./resources/overview.md)`~~ **FIXED** → `TODO:reference_versioned_docs/version-v4/resources/overview.md` (was a broken relative path — resources is a sibling section, not a subdirectory) - - **Status**: PENDING (Resources section migration) - -- ~~Line 58: `[Resource Query API](./resources/overview.md#query)`~~ **FIXED** → `TODO:reference_versioned_docs/version-v4/resources/overview.md#query` (was a broken relative path) - - **Status**: PENDING (Resources section migration) diff --git a/migration-context/link-placeholders/http-link-placeholders.md b/migration-context/link-placeholders/http-link-placeholders.md deleted file mode 100644 index 9227efd1..00000000 --- a/migration-context/link-placeholders/http-link-placeholders.md +++ /dev/null @@ -1,48 +0,0 @@ -# Link Placeholders for HTTP Section - -## reference_versioned_docs/version-v4/http/tls.md - -- Line (intro + body): `[Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md)` - - Context: Noting that operationsApi.tls overrides the root tls section (appears twice) - - Target should be: Configuration section operations.md page - - **Status**: PENDING (Configuration section migration) - -- ~~Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **NOTE**: Not present in file — Related section links to `../security/mtls-authentication.md` which was already resolved. - -## reference_versioned_docs/version-v4/http/overview.md - -- ~~Line (TLS section): `[Security](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` -- ~~Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - -- Line (Related section): `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - - Context: Related reference for REST protocol - - Target should be: REST section overview page - - **Status**: PENDING (REST section migration) - -## reference_versioned_docs/version-v4/http/configuration.md - -- ~~Line (Related section): `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - -- Line (Related section): `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: Full configuration reference - - Target should be: Configuration section overview page - - **Status**: PENDING (Configuration section migration) - -## reference_versioned_docs/version-v4/http/api.md - -- ~~Line (server.recordAnalytics): `[analytics API](TODO:reference_versioned_docs/version-v4/analytics/overview.md)`~~ **RESOLVED** → `../analytics/overview.md` - -- Line (server.operation): `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Link to operations API overview - - Target should be: Operations API overview page - - **Status**: PENDING (Operations API section migration) - -- Line (Related section): `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - - Context: Related reference for REST protocol - - Target should be: REST section overview page - - **Status**: PENDING (REST section migration) - -- Line (Related section): `[Global APIs](TODO:reference_versioned_docs/version-v4/resources/global-apis.md)` - - Context: Full global API reference including tables, databases, Resource, logger, auth - - Target should be: Resources global-apis page - - **Status**: PENDING (Resources section migration) diff --git a/migration-context/link-placeholders/logging-link-placeholders.md b/migration-context/link-placeholders/logging-link-placeholders.md deleted file mode 100644 index 9bd31e66..00000000 --- a/migration-context/link-placeholders/logging-link-placeholders.md +++ /dev/null @@ -1,34 +0,0 @@ -# Link Placeholders for Logging Section - -## reference_versioned_docs/version-v4/logging/overview.md - -- Line 14 + 62: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: Noting that audit logging and transaction logging are documented in the database section - - Target should be: Database transaction/audit logging page - - **Status**: PENDING (Database section migration) - -## reference_versioned_docs/version-v4/logging/configuration.md - -- Line 72 + 192: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: After describing logging.auditLog / related section - - Target should be: Database transaction/audit logging page - - **Status**: PENDING (Database section migration) - -- ~~Line 133: `[HTTP Configuration](TODO:reference_versioned_docs/version-v4/http/configuration.md)`~~ **RESOLVED** → `../http/configuration.md` - -- Line 193: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: Related section — full config reference - - Target should be: Configuration section overview page - - **Status**: PENDING (Configuration section migration) - -## reference_versioned_docs/version-v4/logging/operations.md - -- Line 9 + 76: `[Database / Transaction Logging](TODO:reference_versioned_docs/version-v4/database/transaction.md)` - - Context: Callout + related section noting audit/transaction log operations are in the database section - - Target should be: Database transaction/audit logging page - - **Status**: PENDING (Database section migration) - -- Line 77: `[Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md)` - - Context: Related section — operations API overview - - Target should be: Operations API section overview page - - **Status**: PENDING (Operations API section migration) diff --git a/migration-context/link-placeholders/mqtt-link-placeholders.md b/migration-context/link-placeholders/mqtt-link-placeholders.md deleted file mode 100644 index 5c4a9e3a..00000000 --- a/migration-context/link-placeholders/mqtt-link-placeholders.md +++ /dev/null @@ -1,32 +0,0 @@ -# Link Placeholders for MQTT Section - -## reference_versioned_docs/version-v4/mqtt/overview.md - -- Line 28: `[schema.graphql](TODO:reference_versioned_docs/version-v4/database/schema.md)` - - Context: Explaining how to define a table that becomes an MQTT topic namespace - - Target should be: Schema definition reference page (database section) - - **Status**: PENDING (Database section migration) - -- ~~Line 101: `[HTTP Overview](TODO:reference_versioned_docs/version-v4/http/overview.md)`~~ **RESOLVED** → `../http/overview.md` -- ~~Line 103: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - -- Line 104: `[Database Schema](TODO:reference_versioned_docs/version-v4/database/schema.md)` - - Context: Related section — defining tables/topics - - Target should be: Database schema reference page - - **Status**: PENDING (Database section migration) - -- Line 105: `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - - Context: Related section — noting REST and MQTT share the same path conventions - - Target should be: REST section overview page - - **Status**: PENDING (REST section migration) - -## reference_versioned_docs/version-v4/mqtt/configuration.md - -- ~~Line 20: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)`~~ **RESOLVED** → `../http/tls.md` -- ~~Line 133: `[TLS Configuration](TODO:reference_versioned_docs/version-v4/http/tls.md)`~~ **RESOLVED** → `../http/tls.md` -- ~~Line 134: `[Security Overview](TODO:reference_versioned_docs/version-v4/security/overview.md)`~~ **RESOLVED** → `../security/overview.md` - -- Line 135: `[Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md)` - - Context: Related section — full harperdb-config.yaml reference - - Target should be: Configuration section overview page - - **Status**: PENDING (Configuration section migration) diff --git a/migration-context/link-placeholders/operations-api-link-placeholders.md b/migration-context/link-placeholders/operations-api-link-placeholders.md deleted file mode 100644 index fb679c8b..00000000 --- a/migration-context/link-placeholders/operations-api-link-placeholders.md +++ /dev/null @@ -1,131 +0,0 @@ -# Link Placeholders for Operations API - -## reference_versioned_docs/version-v4/operations-api/overview.md - -- Line 18: `[TODO:reference_versioned_docs/version-v4/configuration/overview.md]` - - Context: Describing how to change the Operations API port via configuration - - Target should be: Configuration overview page (operationsApi.network section) - -- Line 24: `[Basic Authentication](TODO:reference_versioned_docs/version-v4/security/basic-authentication.md)` - - Context: Listing supported authentication methods for Operations API - - Target should be: Basic authentication reference page - -- Line 25: `[JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md)` - - Context: Listing supported authentication methods for Operations API - - Target should be: JWT authentication reference page - -- Line 41: `[Operations](./operations.md)` — **Internal link; already resolved** - -- Table row: `[Databases & Tables](./operations.md#databases--tables)` — **Internal link; already resolved** - -- Table row: `[NoSQL Operations](./operations.md#nosql-operations)` — **Internal link; already resolved** - -- Table row: `[Bulk Operations](./operations.md#bulk-operations)` — **Internal link; already resolved** - -- Table row: `[SQL Operations](./operations.md#sql-operations)` — **Internal link; already resolved** - -- Table row: `[Users & Roles](./operations.md#users--roles)` — **Internal link; already resolved** - -- Table row: `[Token Authentication](./operations.md#token-authentication)` — **Internal link; already resolved** - -- Table row: `[Components](./operations.md#components)` — **Internal link; already resolved** - -- Table row: `[Replication & Clustering](./operations.md#replication--clustering)` — **Internal link; already resolved** - -- Table row: `[Configuration](./operations.md#configuration)` — **Internal link; already resolved** - -- Table row: `[Jobs](./operations.md#jobs)` — **Internal link; already resolved** - -- Table row: `[Logs](./operations.md#logs)` — **Internal link; already resolved** - -- Table row: `[Certificate Management](./operations.md#certificate-management)` — **Internal link; already resolved** - -- Table row: `[Analytics](./operations.md#analytics)` — **Internal link; already resolved** - -- Table row: `[Registration & Licensing](./operations.md#registration--licensing)` — **Internal link; already resolved** - ---- - -## reference_versioned_docs/version-v4/operations-api/operations.md - -### Databases & Tables section - -- Line (Databases & Tables description): `[TODO:reference_versioned_docs/version-v4/database/overview.md]` - - Context: "Detailed documentation" link at top of Databases & Tables section - - Target should be: Database overview page - -### NoSQL Operations section - -- Line (NoSQL Operations description): `[TODO:reference_versioned_docs/version-v4/rest/querying.md]` - - Context: "Detailed documentation" link at top of NoSQL Operations section - - Target should be: REST querying reference (also covers NoSQL query patterns) - -### Bulk Operations section - -- Line (Bulk Operations description): `[TODO:reference_versioned_docs/version-v4/database/jobs.md]` - - Context: "Detailed documentation" link at top of Bulk Operations section - - Target should be: Database jobs reference page - -### SQL Operations section - -- Line (SQL Operations description): `[TODO:reference_versioned_docs/version-v4/legacy/sql]` - - Context: "Detailed documentation" link at top of SQL Operations section - - Target should be: Legacy SQL guide section - -### Users & Roles section - -- Two instances: `[TODO:reference_versioned_docs/version-v4/users-and-roles/operations.md]` - - Context: "Detailed documentation" link and inline "See" link in Users & Roles section - - Target should be: Users & Roles operations page - -### Token Authentication section - -- Line (Token Authentication description): `[TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md]` - - Context: "Detailed documentation" link at top of Token Authentication section - - Target should be: JWT authentication reference page - -### Components section - -- Line (Components description): `[TODO:reference_versioned_docs/version-v4/components/overview.md]` - - Context: "Detailed documentation" link at top of Components section - - Target should be: Components overview page - -- Line (Deprecated Custom Functions): `[TODO:reference_versioned_docs/version-v4/components/overview.md]` - - Context: Referring readers from deprecated custom-functions ops to modern equivalent - - Target should be: Components overview page - -### Replication & Clustering section - -- Line (Replication description): `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` - - Context: "Detailed documentation" link at top of Replication & Clustering section - - Target should be: Replication clustering reference page - -### Configuration section - -- Two instances: `[TODO:reference_versioned_docs/version-v4/configuration/overview.md]` - - Context: "Detailed documentation" link at top of Configuration section and inline reference - - Target should be: Configuration overview page - -### Jobs section - -- Two instances: `[TODO:reference_versioned_docs/version-v4/database/jobs.md]` - - Context: "Detailed documentation" link at top of Jobs section and inline reference - - Target should be: Database jobs reference page - -### Logs section - -- Line (Logs description): `[TODO:reference_versioned_docs/version-v4/logging/operations.md]` - - Context: "Detailed documentation" link at top of Logs section - - Target should be: Logging operations page - -### Certificate Management section - -- Line (Certificate Management description): `[TODO:reference_versioned_docs/version-v4/security/certificate-management.md]` - - Context: "Detailed documentation" link at top of Certificate Management section - - Target should be: Security certificate management page - -### Analytics section - -- Line (Analytics description): `[TODO:reference_versioned_docs/version-v4/analytics/operations.md]` - - Context: "Detailed documentation" link at top of Analytics section - - Target should be: Analytics operations page diff --git a/migration-context/link-placeholders/replication-link-placeholders.md b/migration-context/link-placeholders/replication-link-placeholders.md deleted file mode 100644 index 1208c8a6..00000000 --- a/migration-context/link-placeholders/replication-link-placeholders.md +++ /dev/null @@ -1,11 +0,0 @@ -# Link Placeholders for Replication - -## reference_versioned_docs/version-v4/replication/overview.md - -- Line ~84: `[Certificate Verification](TODO:reference_versioned_docs/version-v4/security/certificate-verification.md)` - - Context: Describing certificate revocation checking for replication connections — links to the cert verification config page for OCSP/CRL settings - - Target should be: `../security/certificate-verification.md` - -- Line ~183: `[Certificate Management](TODO:reference_versioned_docs/version-v4/security/certificate-management.md)` - - Context: "See Also" section footer link - - Target should be: `../security/certificate-management.md` diff --git a/migration-context/link-placeholders/resources-link-placeholders.md b/migration-context/link-placeholders/resources-link-placeholders.md deleted file mode 100644 index 02f673ac..00000000 --- a/migration-context/link-placeholders/resources-link-placeholders.md +++ /dev/null @@ -1,21 +0,0 @@ -# Link Placeholders for Resources - -## reference_versioned_docs/version-v4/resources/overview.md - -- Line 27: `[REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md)` - - Context: Explaining that the REST plugin maps HTTP requests to Resource methods - - Target should be: REST section overview page - -- Line 28: `[MQTT Overview](TODO:reference_versioned_docs/version-v4/mqtt/overview.md)` - - Context: Explaining that the MQTT plugin routes publish/subscribe to Resource methods - - Target should be: MQTT section overview page - -## reference_versioned_docs/version-v4/resources/resource-api.md - -No TODO placeholders — all cross-section links use relative paths for in-section links and section-relative paths otherwise. The global-apis.md has the external TODOs listed above. - -## reference_versioned_docs/version-v4/resources/query-optimization.md - -- Line 57: `[Schema documentation](TODO:reference_versioned_docs/version-v4/database/schema.md)` - - Context: Pointing to where relationship directives are defined in schemas - - Target should be: Database section schema page (not yet merged as of this PR) diff --git a/migration-context/link-placeholders/rest-link-placeholders.md b/migration-context/link-placeholders/rest-link-placeholders.md deleted file mode 100644 index 9d02b15b..00000000 --- a/migration-context/link-placeholders/rest-link-placeholders.md +++ /dev/null @@ -1,39 +0,0 @@ -# Link Placeholders for REST Section - -## reference_versioned_docs/version-v4/rest/overview.md - -- Line (See Also): `[Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition')` - - Context: Intro and See Also section — how to define and export resources - - Target should be: Database / Schema page - - **Status**: PENDING (Database section migration) - -## reference_versioned_docs/version-v4/rest/querying.md - -- Line (directURLMapping section): `[Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema and resource configuration')` - - Context: directURLMapping option reference - - Target should be: Database / Schema page - - **Status**: PENDING (Database section migration) - -- Line (See Also): `[Database / Schema](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition')` - - Context: See Also section - - Target should be: Database / Schema page - - **Status**: PENDING (Database section migration) - -## reference_versioned_docs/version-v4/rest/websockets.md - -- Line (Custom connect() Handler): `[Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API reference')` - - Context: Inline link for more on implementing custom resources - - Target should be: Resources / Resource API page - - **Status**: PENDING (Resources section migration) - -- Line (See Also): `[Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview')` - - Context: Link to custom resource API including `connect()` method - - Target should be: Resources section overview page - - **Status**: PENDING (Resources section migration) - -## reference_versioned_docs/version-v4/rest/server-sent-events.md - -- Line (See Also): `[Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview')` - - Context: Link to custom resource API including `connect()` method - - Target should be: Resources section overview page - - **Status**: PENDING (Resources section migration) diff --git a/migration-context/link-placeholders/security-link-placeholders.md b/migration-context/link-placeholders/security-link-placeholders.md deleted file mode 100644 index 2ba0604e..00000000 --- a/migration-context/link-placeholders/security-link-placeholders.md +++ /dev/null @@ -1,31 +0,0 @@ -# Link Placeholders for Security Section - -## reference_versioned_docs/version-v4/security/mtls-authentication.md - -- Line 47: `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` - - Context: Referring to replication mTLS configuration - - Target should be: Replication clustering page that covers mTLS for replication - -## reference_versioned_docs/version-v4/security/certificate-management.md - -- Line 8: `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` - - Context: Note that this page covers external-facing APIs; replication certs are covered separately - - Target should be: Replication clustering page with certificate management section - -- ~~Line 105: `[TODO:reference_versioned_docs/version-v4/cli/commands.md]`~~ **RESOLVED** → `../cli/commands.md` - -## reference_versioned_docs/version-v4/security/certificate-verification.md - -- Line 190: `[TODO:reference_versioned_docs/version-v4/replication/clustering.md]` - - Context: Replication mTLS configuration reference - - Target should be: Replication clustering page - -## reference_versioned_docs/version-v4/security/configuration.md - -- No pending TODO links. (`cors.md` and `ssl.md` were consolidated into this file; their cross-section links were resolved during initial migration.) - -## reference_versioned_docs/version-v4/security/overview.md - -- ~~`./users-and-roles.md`~~ **FIXED** → `../users-and-roles/overview.md` (users-and-roles is now a top-level section, not a file within security/) -- `TODO:reference_versioned_docs/version-v4/configuration/operations.md#network` — PENDING (Configuration section migration) -- `TODO:reference_versioned_docs/version-v4/configuration/operations.md#tls` — PENDING (Configuration section migration) diff --git a/migration-context/link-placeholders/static-files-link-placeholders.md b/migration-context/link-placeholders/static-files-link-placeholders.md deleted file mode 100644 index 9bbb31bf..00000000 --- a/migration-context/link-placeholders/static-files-link-placeholders.md +++ /dev/null @@ -1,15 +0,0 @@ -# Link Placeholders for Static Files - -## reference_versioned_docs/version-v4/static-files/overview.md - -- Line 41: `[Plugin](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview')` - - Context: Describing that `static` is a Plugin supporting standard `files`/`urlPath` config options - - Target should be: Components overview page (covering the Plugin concept and standard options) - -- Line 46: `[Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — files and urlPath options')` - - Context: Directing the reader to full `files` glob pattern and `urlPath` documentation - - Target should be: Components overview page - -- Line 114: `[Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — including files/urlPath options')` - - Context: "Related" section - - Target should be: Main components/plugins reference page diff --git a/migration-context/link-placeholders/studio-link-placeholders.md b/migration-context/link-placeholders/studio-link-placeholders.md deleted file mode 100644 index 9f262e1b..00000000 --- a/migration-context/link-placeholders/studio-link-placeholders.md +++ /dev/null @@ -1,11 +0,0 @@ -# Link Placeholders for Studio - -## reference_versioned_docs/version-v4/studio/overview.md - -- Line 20: `[configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio "Configuration options")` - - Context: Explaining how to enable local Studio via configuration - - Target should be: Configuration options page localStudio section - - **Status**: PENDING (Configuration section migration) - -- ~~Line 27: `[Operations API](TODO:reference_versioned_docs/version-v4/operations/configuration.md)`~~ **FIXED path** → `TODO:reference_versioned_docs/version-v4/operations-api/overview.md` (was wrong path — `operations` → `operations-api`) - - **Status**: PENDING (Operations API section migration) diff --git a/reference_versioned_docs/version-v4/analytics/operations.md b/reference_versioned_docs/version-v4/analytics/operations.md index c5449b04..e82d3086 100644 --- a/reference_versioned_docs/version-v4/analytics/operations.md +++ b/reference_versioned_docs/version-v4/analytics/operations.md @@ -83,14 +83,14 @@ Queries analytics data for a specific metric over a time range. ### Parameters -| Parameter | Required | Type | Description | -| ---------------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `operation` | Yes | string | Must be `"get_analytics"` | -| `metric` | Yes | string | Metric name — use `list_metrics` to get valid values | -| `start_time` | No | number | Start of time range as Unix timestamp in milliseconds | -| `end_time` | No | number | End of time range as Unix timestamp in milliseconds | -| `get_attributes` | No | string[] | Attributes to include in each result. If omitted, all attributes are returned | -| `conditions` | No | object[] | Additional filter conditions. Same format as [`search_by_conditions`](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'Operations API — search_by_conditions') | +| Parameter | Required | Type | Description | +| ---------------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------------------- | +| `operation` | Yes | string | Must be `"get_analytics"` | +| `metric` | Yes | string | Metric name — use `list_metrics` to get valid values | +| `start_time` | No | number | Start of time range as Unix timestamp in milliseconds | +| `end_time` | No | number | End of time range as Unix timestamp in milliseconds | +| `get_attributes` | No | string[] | Attributes to include in each result. If omitted, all attributes are returned | +| `conditions` | No | object[] | Additional filter conditions. Same format as [`search_by_conditions`](../operations-api/operations.md#nosql-operations) | ### Request @@ -133,4 +133,4 @@ Queries analytics data for a specific metric over a time range. ## Related - [Analytics Overview](./overview) -- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Full Operations API reference') +- [Operations API Overview](../operations-api/overview.md) diff --git a/reference_versioned_docs/version-v4/analytics/overview.md b/reference_versioned_docs/version-v4/analytics/overview.md index 6dd7a4f2..679e70f4 100644 --- a/reference_versioned_docs/version-v4/analytics/overview.md +++ b/reference_versioned_docs/version-v4/analytics/overview.md @@ -195,7 +195,7 @@ Applications can record custom metrics using the `server.recordAnalytics()` API. ## Analytics Configuration -The `analytics.aggregatePeriod` configuration option controls how frequently aggregate summaries are written. See [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') for details. +The `analytics.aggregatePeriod` configuration option controls how frequently aggregate summaries are written. See [Configuration Overview](../configuration/overview.md) for details. Per-component analytics logging can be configured via `analytics.logging`. See [Logging Configuration](../logging/configuration.md) for details. @@ -204,4 +204,4 @@ Per-component analytics logging can be configured via `analytics.logging`. See [ - [Analytics Operations](./operations) - [HTTP API](../http/api.md) - [Logging Configuration](../logging/configuration.md) -- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full configuration reference') +- [Configuration Overview](../configuration/overview.md) diff --git a/reference_versioned_docs/version-v4/cli/commands.md b/reference_versioned_docs/version-v4/cli/commands.md index 7143f0a2..3f1fdc94 100644 --- a/reference_versioned_docs/version-v4/cli/commands.md +++ b/reference_versioned_docs/version-v4/cli/commands.md @@ -58,7 +58,7 @@ harper \ --ROOTPATH='/hdb' ``` -**Note**: When used in conjunction, command line arguments override environment variables. See [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview') for a full list of configuration parameters. +**Note**: When used in conjunction, command line arguments override environment variables. See [Configuration](../configuration/overview.md) for a full list of configuration parameters. :::info For more information on installation, see [Getting Started / Install and Connect Harper](/learn/getting-started/install-and-connect-harper). @@ -265,5 +265,5 @@ The CLI supports executing commands on remote Harper instances. For details, see - [CLI Overview](./overview.md) - General CLI information - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI - [CLI Authentication](./authentication.md) - Authentication mechanisms -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration') - Configuration parameters for installation +- [Configuration](../configuration/overview.md) - Configuration parameters for installation - [Database Compaction](../database/compaction.md) - More on database compaction diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md index 98bf0446..bdfd49f5 100644 --- a/reference_versioned_docs/version-v4/cli/operations-api-commands.md +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -9,7 +9,7 @@ title: Operations API Commands Added in: v4.3.0 -The Harper CLI supports executing operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') directly from the command line. This enables powerful automation and scripting capabilities. +The Harper CLI supports executing operations from the [Operations API](../operations-api/overview.md) directly from the command line. This enables powerful automation and scripting capabilities. ## General Syntax @@ -34,80 +34,80 @@ The following operations are available through the CLI. Operations that require This is just a brief overview of all operations available as CLI commands. Review the respective operation documentation for more information on available arguments and expected behavior. Keep in mind that all operations options are converted to CLI arguments in the same way (using `snake_case`). ::: -| Operation | Description | Category | Available Since | -| -------------------------------- | ------------------------------------- | ---------------------------------------------------------- | --------------- | -| `describe_table` | Describe table structure and metadata | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `describe_all` | Describe all databases and tables | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `describe_database` | Describe database structure | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `create_database` | Create a new database | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `drop_database` | Delete a database | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `create_table` | Create a new table | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `drop_table` | Delete a table | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `create_attribute` | Create a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `drop_attribute` | Delete a table attribute | [Database](TODO:../operations-api/database.md) | v4.3.0 | -| `search_by_id` | Search records by ID | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `search_by_value` | Search records by attribute value | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `insert` | Insert new records | [Data](TODO:../operations-api/data.md) | v4.4.9 | -| `update` | Update existing records | [Data](TODO:../operations-api/data.md) | v4.4.9 | -| `upsert` | Insert or update records | [Data](TODO:../operations-api/data.md) | v4.4.9 | -| `delete` | Delete records | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `sql` | Execute SQL queries | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `csv_file_load` | Load data from CSV file | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `csv_url_load` | Load data from CSV URL | [Data](TODO:../operations-api/data.md) | v4.3.0 | -| `list_users` | List all users | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `add_user` | Create a new user | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `alter_user` | Modify user properties | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `drop_user` | Delete a user | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `list_roles` | List all roles | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `drop_role` | Delete a role | [Security](TODO:../operations-api/security.md) | v4.3.0 | -| `create_csr` | Create certificate signing request | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `sign_certificate` | Sign a certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `list_certificates` | List SSL/TLS certificates | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `add_certificate` | Add SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `remove_certificate` | Remove SSL/TLS certificate | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `add_ssh_key` | Add SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `get_ssh_key` | Get SSH key | [Security](TODO:../operations-api/security.md) | v4.7.2 | -| `update_ssh_key` | Update SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `delete_ssh_key` | Delete SSH key | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `list_ssh_keys` | List all SSH keys | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `set_ssh_known_hosts` | Set SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `get_ssh_known_hosts` | Get SSH known hosts | [Security](TODO:../operations-api/security.md) | v4.4.0 | -| `cluster_get_routes` | Get cluster routing information | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `cluster_network` | Get cluster network status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `cluster_status` | Get cluster status | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `remove_node` | Remove node from cluster | [Clustering](TODO:../operations-api/clustering.md) | v4.3.0 | -| `add_component` | Add a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `deploy_component` | Deploy a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `deploy` (alias) | Alias for `deploy_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `package_component` | Package a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `package` (alias) | Alias for `package_component` | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `drop_component` | Remove a component | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `get_components` | List all components | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `get_component_file` | Get component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `set_component_file` | Set component file contents | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `install_node_modules` | Install Node.js dependencies | [Components](TODO:../operations-api/components.md) | v4.3.0 | -| `set_configuration` | Update configuration settings | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | -| `get_configuration` | Get current configuration | [Configuration](TODO:../operations-api/configuration.md) | v4.3.0 | -| `create_authentication_tokens` | Create authentication tokens | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | -| `refresh_operation_token` | Refresh operation token | [Authentication](TODO:../operations-api/authentication.md) | v4.3.0 | -| `restart_service` | Restart Harper service | [System](TODO:../operations-api/system.md) | v4.3.0 | -| `restart` | Restart Harper instance | [System](TODO:../operations-api/system.md) | v4.3.0 | -| `system_information` | Get system information | [System](TODO:../operations-api/system.md) | v4.3.0 | -| `registration_info` | Get registration information | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | -| `get_fingerprint` | Get instance fingerprint | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | -| `set_license` | Set license key | [Licensing](TODO:../operations-api/licensing.md) | v4.3.0 | -| `get_usage_licenses` | Get usage and license info | [Licensing](TODO:../operations-api/licensing.md) | v4.7.3 | -| `get_job` | Get job status | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | -| `search_jobs_by_start_date` | Search jobs by start date | [Jobs](TODO:../operations-api/jobs.md) | v4.3.0 | -| `read_log` | Read application logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `read_transaction_log` | Read transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `read_audit_log` | Read audit logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `delete_transaction_logs_before` | Delete old transaction logs | [Logging](TODO:../operations-api/logging.md) | v4.3.0 | -| `purge_stream` | Purge streaming data | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | -| `delete_records_before` | Delete old records | [Maintenance](TODO:../operations-api/maintenance.md) | v4.3.0 | -| `get_status` | Get custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | -| `set_status` | Set custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | -| `clear_status` | Clear custom status information | [Status](TODO:../operations-api/status.md) | v4.6.0 | +| Operation | Description | Category | Available Since | +| -------------------------------- | ------------------------------------- | ---------------------------------------------------------------------- | --------------- | +| `describe_table` | Describe table structure and metadata | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `describe_all` | Describe all databases and tables | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `describe_database` | Describe database structure | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `create_database` | Create a new database | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `drop_database` | Delete a database | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `create_table` | Create a new table | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `drop_table` | Delete a table | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `create_attribute` | Create a table attribute | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `drop_attribute` | Delete a table attribute | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `search_by_id` | Search records by ID | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `search_by_value` | Search records by attribute value | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `insert` | Insert new records | [Data](../operations-api/operations.md#nosql-operations) | v4.4.9 | +| `update` | Update existing records | [Data](../operations-api/operations.md#nosql-operations) | v4.4.9 | +| `upsert` | Insert or update records | [Data](../operations-api/operations.md#nosql-operations) | v4.4.9 | +| `delete` | Delete records | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `sql` | Execute SQL queries | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `csv_file_load` | Load data from CSV file | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `csv_url_load` | Load data from CSV URL | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `list_users` | List all users | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `add_user` | Create a new user | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `alter_user` | Modify user properties | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `drop_user` | Delete a user | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `list_roles` | List all roles | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `drop_role` | Delete a role | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `create_csr` | Create certificate signing request | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `sign_certificate` | Sign a certificate | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `list_certificates` | List SSL/TLS certificates | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `add_certificate` | Add SSL/TLS certificate | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `remove_certificate` | Remove SSL/TLS certificate | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `add_ssh_key` | Add SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `get_ssh_key` | Get SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.7.2 | +| `update_ssh_key` | Update SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `delete_ssh_key` | Delete SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `list_ssh_keys` | List all SSH keys | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `set_ssh_known_hosts` | Set SSH known hosts | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `get_ssh_known_hosts` | Get SSH known hosts | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `cluster_get_routes` | Get cluster routing information | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `cluster_network` | Get cluster network status | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `cluster_status` | Get cluster status | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `remove_node` | Remove node from cluster | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `add_component` | Add a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `deploy_component` | Deploy a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `deploy` (alias) | Alias for `deploy_component` | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `package_component` | Package a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `package` (alias) | Alias for `package_component` | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `drop_component` | Remove a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `get_components` | List all components | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `get_component_file` | Get component file contents | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `set_component_file` | Set component file contents | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `install_node_modules` | Install Node.js dependencies | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `set_configuration` | Update configuration settings | [Configuration](../operations-api/operations.md#configuration) | v4.3.0 | +| `get_configuration` | Get current configuration | [Configuration](../operations-api/operations.md#configuration) | v4.3.0 | +| `create_authentication_tokens` | Create authentication tokens | [Authentication](../operations-api/operations.md#token-authentication) | v4.3.0 | +| `refresh_operation_token` | Refresh operation token | [Authentication](../operations-api/operations.md#token-authentication) | v4.3.0 | +| `restart_service` | Restart Harper service | [System](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `restart` | Restart Harper instance | [System](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `system_information` | Get system information | [System](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `registration_info` | Get registration information | [Licensing](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `get_fingerprint` | Get instance fingerprint | [Licensing](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `set_license` | Set license key | [Licensing](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `get_usage_licenses` | Get usage and license info | [Licensing](../operations-api/operations.md#registration--licensing) | v4.7.3 | +| `get_job` | Get job status | [Jobs](../operations-api/operations.md#jobs) | v4.3.0 | +| `search_jobs_by_start_date` | Search jobs by start date | [Jobs](../operations-api/operations.md#jobs) | v4.3.0 | +| `read_log` | Read application logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `read_transaction_log` | Read transaction logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `read_audit_log` | Read audit logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `delete_transaction_logs_before` | Delete old transaction logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `purge_stream` | Purge streaming data | [Maintenance](../operations-api/operations.md#jobs) | v4.3.0 | +| `delete_records_before` | Delete old records | [Maintenance](../operations-api/operations.md#jobs) | v4.3.0 | +| `get_status` | Get custom status information | [Status](../operations-api/operations.md#registration--licensing) | v4.6.0 | +| `set_status` | Set custom status information | [Status](../operations-api/operations.md#registration--licensing) | v4.6.0 | +| `clear_status` | Clear custom status information | [Status](../operations-api/operations.md#registration--licensing) | v4.6.0 | ### Command Aliases @@ -116,7 +116,7 @@ The following aliases are available for convenience: - `deploy` → `deploy_component` - `package` → `package_component` -For detailed parameter information for each operation, see the [Operations API documentation](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'Complete operations list'). +For detailed parameter information for each operation, see the [Operations API documentation](../operations-api/operations.md). ## Command Examples @@ -171,7 +171,7 @@ harper search_by_value table=dog search_attribute=name search_value=harper get_a ``` :::tip -For more information on querying data, see the [REST Reference](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST API reference') and [GraphQL Querying](../graphql-querying/overview.md). +For more information on querying data, see the [REST Reference](../rest/overview.md) and [GraphQL Querying](../graphql-querying/overview.md). ::: ### Configuration Operations @@ -189,7 +189,7 @@ harper get_configuration ``` :::tip -For comprehensive configuration options, see the [Configuration Reference](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration reference'). +For comprehensive configuration options, see the [Configuration Reference](../configuration/overview.md). ::: ### Component Operations @@ -213,7 +213,7 @@ harper deploy project=my-app package=https://github.com/user/repo ``` :::tip -For more information on components and applications, see the [Components Reference](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components reference'). +For more information on components and applications, see the [Components Reference](../components/overview.md). ::: ### User and Role Operations @@ -270,8 +270,8 @@ harper restart target=https://server.com:9925 replicated=true For more information on Harper applications and components, see: -- [Applications](TODO:reference_versioned_docs/version-v4/applications/overview.md 'Applications reference') - Application architecture and structure -- [Deploying Harper Applications](TODO:learn_link 'Deploying applications guide') - Step-by-step deployment guide +- [Components](../components/overview.md) - Application architecture and structure +- [Deploying Harper Applications](/learn/getting-started/install-and-connect-harper) - Step-by-step deployment guide ## Parameter Formatting @@ -295,7 +295,7 @@ harper search_by_id database=dev table=dog ids='["1","2","3"]' Object parameters are not supported via CLI. For operations requiring complex nested objects, use: -- The [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API') via HTTP +- The [Operations API](../operations-api/overview.md) via HTTP - A custom script or tool ### Boolean Parameters @@ -369,12 +369,12 @@ The following operation types are **not supported** via CLI: - File upload operations - Streaming operations -For these operations, use the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API') directly via HTTP. +For these operations, use the [Operations API](../operations-api/overview.md) directly via HTTP. ## See Also - [CLI Overview](./overview.md) - General CLI information - [CLI Commands](./commands.md) - Core CLI commands -- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') - Operations API documentation -- [Operations API Reference](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'Operations reference') - Complete operations list +- [Operations API Overview](../operations-api/overview.md) - Operations API documentation +- [Operations API Reference](../operations-api/operations.md) - Complete operations list - [CLI Authentication](./authentication.md) - Authentication details diff --git a/reference_versioned_docs/version-v4/cli/overview.md b/reference_versioned_docs/version-v4/cli/overview.md index 467cdc67..5571becc 100644 --- a/reference_versioned_docs/version-v4/cli/overview.md +++ b/reference_versioned_docs/version-v4/cli/overview.md @@ -115,7 +115,7 @@ See [CLI Commands](./commands.md) for detailed documentation on each command. Added in: v4.3.0 -The Harper CLI supports executing most operations from the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') directly from the command line. This includes operations that do not require complex nested parameters. +The Harper CLI supports executing most operations from the [Operations API](../operations-api/overview.md) directly from the command line. This includes operations that do not require complex nested parameters. **Syntax**: `harper =` @@ -196,5 +196,5 @@ See [CLI Commands](./commands.md) for detailed information on `harper dev` and o - [CLI Commands](./commands.md) - Detailed reference for each CLI command - [Operations API Commands](./operations-api-commands.md) - Operations available through CLI - [CLI Authentication](./authentication.md) - Authentication mechanisms -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview') - Harper configuration options -- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API') - Full operations API reference +- [Configuration](../configuration/overview.md) - Harper configuration options +- [Operations API](../operations-api/overview.md) - Full operations API reference diff --git a/reference_versioned_docs/version-v4/components/applications.md b/reference_versioned_docs/version-v4/components/applications.md index fb0185a4..5df64215 100644 --- a/reference_versioned_docs/version-v4/components/applications.md +++ b/reference_versioned_docs/version-v4/components/applications.md @@ -112,7 +112,7 @@ HarperDB/application-template#semver:v1.0.0 Harper generates a `package.json` from component configurations and uses a form of `npm install` to resolve them. This is why specifying a local file path creates a symlink (changes are picked up between restarts without redeploying). -For SSH-based private repos, use the [Add SSH Key](#add-ssh-key) operation to register keys first. +For SSH-based private repos, use the [Add SSH Key](#add_ssh_key) operation to register keys first. ## Dependency Management diff --git a/reference_versioned_docs/version-v4/components/extension-api.md b/reference_versioned_docs/version-v4/components/extension-api.md index 09fa8770..5d729cf6 100644 --- a/reference_versioned_docs/version-v4/components/extension-api.md +++ b/reference_versioned_docs/version-v4/components/extension-api.md @@ -141,7 +141,7 @@ Parameters: A Protocol Extension is a more advanced form of Resource Extension, primarily used for implementing higher-level protocols (e.g., building and running a Next.js project) or adding custom networking handlers. -Protocol Extensions use the [`server`](TODO:reference_versioned_docs/version-v4/http/api.md 'HTTP server global API') global API for custom networking. +Protocol Extensions use the [`server`](../http/api.md) global API for custom networking. ### Protocol Extension Configuration diff --git a/reference_versioned_docs/version-v4/components/overview.md b/reference_versioned_docs/version-v4/components/overview.md index 2dfe8d1d..d41b7475 100644 --- a/reference_versioned_docs/version-v4/components/overview.md +++ b/reference_versioned_docs/version-v4/components/overview.md @@ -166,5 +166,5 @@ Harper collects status from each component at load time and tracks any registere - [Applications](./applications.md) — Managing and deploying applications - [Extension API](./extension-api.md) — Building custom extensions - [Plugin API](./plugin-api.md) — Building plugins (experimental, recommended for new extensions) -- [TODO:reference_versioned_docs/version-v4/resources/resource-api.md](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API reference') — Resource class interface -- [TODO:reference_versioned_docs/version-v4/database/schema.md](TODO:reference_versioned_docs/version-v4/database/schema.md 'Schema definition') — Defining schemas with graphqlSchema +- [Resource API](../resources/resource-api.md) — Resource class interface +- [Database Schema](../database/schema.md) — Defining schemas with graphqlSchema diff --git a/reference_versioned_docs/version-v4/components/plugin-api.md b/reference_versioned_docs/version-v4/components/plugin-api.md index e91e2ded..c92cf839 100644 --- a/reference_versioned_docs/version-v4/components/plugin-api.md +++ b/reference_versioned_docs/version-v4/components/plugin-api.md @@ -120,7 +120,7 @@ The central object passed to `handleApplication()`. Provides access to configura - **`'error'`** — `error: unknown` — An error occurred - **`'ready'`** — Emitted when the Scope is ready after loading the config file -#### `scope.handleEntry([files][, handler])` +#### `scope.handleEntry([files][, handler])` {#scopehandleentry} Returns an [`EntryHandler`](#class-entryhandler) for watching and processing file system entries. @@ -159,11 +159,11 @@ Request a Harper restart. Does not restart immediately—indicates to the user t #### `scope.resources` -Returns: `Map` — Currently loaded [Resource](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API') instances. +Returns: `Map` — Currently loaded [Resource](../resources/resource-api.md) instances. #### `scope.server` -Returns: `server` — Reference to the [server](TODO:reference_versioned_docs/version-v4/http/api.md 'HTTP server global API') global API. Use for registering HTTP middleware, custom networking, etc. +Returns: `server` — Reference to the [server](../http/api.md) global API. Use for registering HTTP middleware, custom networking, etc. #### `scope.options` diff --git a/reference_versioned_docs/version-v4/configuration/operations.md b/reference_versioned_docs/version-v4/configuration/operations.md index 23361381..109bb8c5 100644 --- a/reference_versioned_docs/version-v4/configuration/operations.md +++ b/reference_versioned_docs/version-v4/configuration/operations.md @@ -16,7 +16,7 @@ For the full list of configurable options, see [Configuration Options](./options ## Set Configuration -Modifies one or more Harper configuration parameters. **Requires a [restart](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart 'restart operation') or [restart_service](TODO:reference_versioned_docs/version-v4/operations-api/operations.md#restart-service 'restart_service operation') to take effect.** +Modifies one or more Harper configuration parameters. **Requires a [restart](../operations-api/operations.md#restart) or [restart_service](../operations-api/operations.md#restart_service) to take effect.** `operation` _(required)_ — must be `set_configuration` diff --git a/reference_versioned_docs/version-v4/database/api.md b/reference_versioned_docs/version-v4/database/api.md index 93162ed6..bb8c225d 100644 --- a/reference_versioned_docs/version-v4/database/api.md +++ b/reference_versioned_docs/version-v4/database/api.md @@ -12,7 +12,7 @@ Harper exposes a set of global variables and functions that JavaScript code (in ## `tables` -`tables` is an object whose properties are the tables in the default database (`data`). Each table defined in your `schema.graphql` file is available as a property, and the value is the table class that implements the [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md). +`tables` is an object whose properties are the tables in the default database (`data`). Each table defined in your `schema.graphql` file is available as a property, and the value is the table class that implements the [Resource API](../resources/resource-api.md). ```graphql # schema.graphql @@ -49,7 +49,7 @@ for await (const record of Product.search(query)) { } ``` -For the full set of methods available on table classes, see the [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md). +For the full set of methods available on table classes, see the [Resource API](../resources/resource-api.md). ## `databases` @@ -238,6 +238,6 @@ When a field is typed as `Blob` in the schema, any string or buffer assigned via ## Related Documentation - [Schema](./schema.md) — Defining tables and blob fields -- [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md) — Full table class method reference +- [Resource API](../resources/resource-api.md) — Full table class method reference - [Transaction Logging](./transaction.md) — Audit log and transaction log for data change history -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md) — Blob storage path configuration +- [Configuration](../configuration/options.md) — Blob storage path configuration diff --git a/reference_versioned_docs/version-v4/database/compaction.md b/reference_versioned_docs/version-v4/database/compaction.md index 152a9ab4..a4ede5f2 100644 --- a/reference_versioned_docs/version-v4/database/compaction.md +++ b/reference_versioned_docs/version-v4/database/compaction.md @@ -68,4 +68,4 @@ STORAGE_COMPACTONSTART=true STORAGE_COMPACTONSTARTKEEPBACKUP=true harperdb - [Storage Algorithm](./storage-algorithm.md) — How Harper stores data using LMDB - [CLI Commands](../cli/commands.md) — `copy-db` CLI command reference -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'storage section') — Full storage configuration options including compression settings +- [Configuration](../configuration/options.md) — Full storage configuration options including compression settings diff --git a/reference_versioned_docs/version-v4/database/data-loader.md b/reference_versioned_docs/version-v4/database/data-loader.md index 521f6153..962b3706 100644 --- a/reference_versioned_docs/version-v4/database/data-loader.md +++ b/reference_versioned_docs/version-v4/database/data-loader.md @@ -20,7 +20,7 @@ dataLoader: files: 'data/*.json' ``` -`dataLoader` is an [Extension](TODO:reference_versioned_docs/version-v4/components/extension-api.md 'Extension component API') and supports the standard `files` configuration option, including glob patterns. +`dataLoader` is an [Extension](../components/extension-api.md) and supports the standard `files` configuration option, including glob patterns. ## Data File Format @@ -213,4 +213,4 @@ Because the data loader uses content hashing, adding new countries or correcting - [Schema](./schema.md) — Defining table structure before loading data - [Jobs](./jobs.md) — Bulk data operations via the Operations API (CSV/JSON import from file, URL, or S3) -- [Components](TODO:reference_versioned_docs/version-v4/components/overview.md) — Extension and plugin system that the data loader is built on +- [Components](../components/overview.md) — Extension and plugin system that the data loader is built on diff --git a/reference_versioned_docs/version-v4/database/jobs.md b/reference_versioned_docs/version-v4/database/jobs.md index 5931746c..63fd8e0e 100644 --- a/reference_versioned_docs/version-v4/database/jobs.md +++ b/reference_versioned_docs/version-v4/database/jobs.md @@ -17,7 +17,7 @@ Job status values: ## Bulk Operations -The following operations create jobs. All bulk operations are sent to the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md). +The following operations create jobs. All bulk operations are sent to the [Operations API](../operations-api/overview.md). ### CSV Data Load @@ -268,5 +268,5 @@ _Restricted to `super_user` roles._ ## Related Documentation - [Data Loader](./data-loader.md) — Component-based data loading as part of deployment -- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Sending operations to Harper +- [Operations API](../operations-api/overview.md) — Sending operations to Harper - [Transaction Logging](./transaction.md) — Recording a history of changes made to tables diff --git a/reference_versioned_docs/version-v4/database/overview.md b/reference_versioned_docs/version-v4/database/overview.md index af5b8471..8025d577 100644 --- a/reference_versioned_docs/version-v4/database/overview.md +++ b/reference_versioned_docs/version-v4/database/overview.md @@ -31,7 +31,7 @@ The most common way to use Harper's database is through the **schema system**. B You do not need to build custom application code to use the database. A schema definition alone is enough to create fully functional, queryable REST endpoints for your data. -For more advanced use cases, you can extend table behavior using the [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Custom resource logic layered on top of tables'). +For more advanced use cases, you can extend table behavior using the [Resource API](../resources/resource-api.md). ### Architecture Overview @@ -118,6 +118,6 @@ For deeper coverage of each database feature, see the dedicated pages in this se ## Related Documentation - [REST](../rest/overview.md) — HTTP interface built on top of the database resource system -- [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md) — Custom application logic extending database tables -- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Direct database management operations (create/drop databases and tables, insert/update/delete records) -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md) — Storage configuration options (compression, blob paths, compaction) +- [Resources](../resources/overview.md) — Custom application logic extending database tables +- [Operations API](../operations-api/overview.md) — Direct database management operations (create/drop databases and tables, insert/update/delete records) +- [Configuration](../configuration/overview.md) — Storage configuration options (compression, blob paths, compaction) diff --git a/reference_versioned_docs/version-v4/database/schema.md b/reference_versioned_docs/version-v4/database/schema.md index 93cb5b7f..905aa1f9 100644 --- a/reference_versioned_docs/version-v4/database/schema.md +++ b/reference_versioned_docs/version-v4/database/schema.md @@ -477,7 +477,7 @@ When a table is created through the Operations API or Studio without a schema de Dynamic schema tables are additive — new attributes are added as new data arrives. Existing records will have `null` for any newly added attributes. -Use `create_attribute` and `drop_attribute` operations to manually manage attributes on dynamic schema tables. See the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/operations.md 'NoSQL and database operations') for details. +Use `create_attribute` and `drop_attribute` operations to manually manage attributes on dynamic schema tables. See the [Operations API](../operations-api/operations.md#databases--tables) for details. ## OpenAPI Specification @@ -498,6 +498,6 @@ Harper does **not** support renaming tables. Changing a type name in a schema de - [JavaScript API](./api.md) — `tables`, `databases`, `transaction()`, and `createBlob()` globals for working with schema-defined tables in code - [Data Loader](./data-loader.md) — Seed tables with initial data alongside schema deployment - [REST Querying](../rest/querying.md) — Querying tables via HTTP using schema-defined attributes and relationships -- [Resources](TODO:reference_versioned_docs/version-v4/resources/resource-api.md) — Extending table behavior with custom application logic +- [Resources](../resources/resource-api.md) — Extending table behavior with custom application logic - [Storage Algorithm](./storage-algorithm.md) — How Harper indexes and stores schema-defined data -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'graphqlSchema component and storage options') — Component configuration for schemas +- [Configuration](../configuration/options.md) — Component configuration for schemas diff --git a/reference_versioned_docs/version-v4/database/storage-algorithm.md b/reference_versioned_docs/version-v4/database/storage-algorithm.md index 346f2cb4..35af971b 100644 --- a/reference_versioned_docs/version-v4/database/storage-algorithm.md +++ b/reference_versioned_docs/version-v4/database/storage-algorithm.md @@ -44,7 +44,7 @@ Within the LMDB implementation, table records are grouped into a single LMDB env Changed in: v4.3.0 — Compression is now enabled by default for all records over 4KB -Harper compresses record data automatically for records over 4KB. Compression settings can be configured in the [storage configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'storage configuration options'). Note that compression settings cannot be changed on existing databases without creating a new compacted copy — see [Compaction](./compaction.md). +Harper compresses record data automatically for records over 4KB. Compression settings can be configured in the [storage configuration](../configuration/options.md). Note that compression settings cannot be changed on existing databases without creating a new compacted copy — see [Compaction](./compaction.md). ## Performance Characteristics @@ -108,4 +108,4 @@ Indexes are ordered — booleans first, then numbers (numerically), then strings - [Schema](./schema.md) — Defining indexed attributes and vector indexes - [Compaction](./compaction.md) — Reclaiming free space and applying new storage configuration to existing databases -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/options.md 'storage section') — Storage configuration options (compression, memory maps, blob paths) +- [Configuration](../configuration/options.md) — Storage configuration options (compression, memory maps, blob paths) diff --git a/reference_versioned_docs/version-v4/database/system-tables.md b/reference_versioned_docs/version-v4/database/system-tables.md index 6b457e9f..683dfb6e 100644 --- a/reference_versioned_docs/version-v4/database/system-tables.md +++ b/reference_versioned_docs/version-v4/database/system-tables.md @@ -144,7 +144,7 @@ Can be queried to inspect the current replication topology: } ``` -Used by the `add_node`, `update_node`, and related clustering operations. See [Replication](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Clustering and node management') for details. +Used by the `add_node`, `update_node`, and related clustering operations. See [Replication](../replication/clustering.md) for details. ### `hdb_certificate` @@ -154,5 +154,5 @@ Stores TLS certificates used in replication. Can be queried to inspect the certi - [Analytics](../analytics/overview.md) — Full reference for analytics metrics tracked in `hdb_analytics` and `hdb_raw_analytics` - [Data Loader](./data-loader.md) — Component that writes to `hdb_dataloader_hash` -- [Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md) — Clustering and replication system that uses `hdb_nodes` and `hdb_certificate` -- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Querying system tables using `search_by_conditions` +- [Replication](../replication/overview.md) — Clustering and replication system that uses `hdb_nodes` and `hdb_certificate` +- [Operations API](../operations-api/overview.md) — Querying system tables using `search_by_conditions` diff --git a/reference_versioned_docs/version-v4/database/transaction.md b/reference_versioned_docs/version-v4/database/transaction.md index d2a58968..3ae8847f 100644 --- a/reference_versioned_docs/version-v4/database/transaction.md +++ b/reference_versioned_docs/version-v4/database/transaction.md @@ -149,6 +149,6 @@ This overrides the [`logging.auditLog`](../logging/configuration.md) global conf ## Related Documentation - [Logging](../logging/overview.md) — Application and system logging (separate from transaction/audit logging) -- [Replication](TODO:reference_versioned_docs/version-v4/replication/overview.md) — Clustering setup required for transaction logs +- [Replication](../replication/overview.md) — Clustering setup required for transaction logs - [Logging Configuration](../logging/configuration.md) — Global audit log configuration (`logging.auditLog`) -- [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md) — Sending operations to Harper +- [Operations API](../operations-api/overview.md) — Sending operations to Harper diff --git a/reference_versioned_docs/version-v4/environment-variables/overview.md b/reference_versioned_docs/version-v4/environment-variables/overview.md index 2b66a486..0c72b83b 100644 --- a/reference_versioned_docs/version-v4/environment-variables/overview.md +++ b/reference_versioned_docs/version-v4/environment-variables/overview.md @@ -10,7 +10,7 @@ title: Environment Variables Harper supports loading environment variables in Harper applications `process.env` using the built-in `loadEnv` plugin. This is the standard way to supply secrets and configuration to your Harper components without hardcoding values. `loadEnv` does **not** need to be installed as it is built into Harper and only needs to be declared in your `config.yaml`. :::note -If you are looking for information on how to configure your Harper installation using environment variables, see [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration section overview, including environment variable configuration') section for more information. +If you are looking for information on how to configure your Harper installation using environment variables, see [Configuration](../configuration/overview.md) section for more information. ::: ## Basic Usage @@ -73,5 +73,5 @@ Files are loaded in the order specified. ## Related -- [Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Extensions overview') -- [Configuration](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration section, including environment variable configuration conventions and HARPER_DEFAULT_CONFIG / HARPER_SET_CONFIG') +- [Components Overview](../components/overview.md) +- [Configuration](../configuration/overview.md) diff --git a/reference_versioned_docs/version-v4/fastify-routes/overview.md b/reference_versioned_docs/version-v4/fastify-routes/overview.md index 9ce54f8d..ddb5d3df 100644 --- a/reference_versioned_docs/version-v4/fastify-routes/overview.md +++ b/reference_versioned_docs/version-v4/fastify-routes/overview.md @@ -7,10 +7,10 @@ title: Define Fastify Routes # Define Fastify Routes :::note -Fastify routes are discouraged in favor of modern routing with [Custom Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources documentation'), but remain a supported feature for backwards compatibility and specific use cases. +Fastify routes are discouraged in favor of modern routing with [Custom Resources](../resources/overview.md), but remain a supported feature for backwards compatibility and specific use cases. ::: -Harper provides a build-in plugin for loading [Fastify](https://www.fastify.io/) routes as a way to define custom endpoints for your Harper application. While we generally recommend building your endpoints/APIs with Harper's [REST interface](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface documentation') for better performance and standards compliance, Fastify routes can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. +Harper provides a build-in plugin for loading [Fastify](https://www.fastify.io/) routes as a way to define custom endpoints for your Harper application. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest/overview.md) for better performance and standards compliance, Fastify routes can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): diff --git a/reference_versioned_docs/version-v4/graphql-querying/overview.md b/reference_versioned_docs/version-v4/graphql-querying/overview.md index 8e4bbf6d..917f0e54 100644 --- a/reference_versioned_docs/version-v4/graphql-querying/overview.md +++ b/reference_versioned_docs/version-v4/graphql-querying/overview.md @@ -14,7 +14,7 @@ Added in: v4.4.0 (provisional) Changed in: v4.5.0 (disabled by default, configuration options) -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](TODO:reference_versioned_docs/version-v4/components/applications.md 'Schema definition documentation'), and for querying [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview'). +Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../components/applications.md), and for querying [Resources](../resources/overview.md). Get started by setting `graphql: true` in `config.yaml`. This configuration option was added in v4.5.0 to allow more granular control over the GraphQL endpoint. @@ -56,7 +56,7 @@ Accept: application/graphql-response+json > Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. -The Harper GraphQL querying system is strictly limited to exported Harper Resources. This will typically be a table that uses the `@exported` directive in its schema or `export`'ed custom resources. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](TODO:reference_versioned_docs/version-v4/resources/overview.md#query 'Resource Query API') for more complex queries. +The Harper GraphQL querying system is strictly limited to exported Harper Resources. This will typically be a table that uses the `@exported` directive in its schema or `export`'ed custom resources. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](../rest/querying.md) for more complex queries. Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: diff --git a/reference_versioned_docs/version-v4/http/api.md b/reference_versioned_docs/version-v4/http/api.md index 2690913e..bde865df 100644 --- a/reference_versioned_docs/version-v4/http/api.md +++ b/reference_versioned_docs/version-v4/http/api.md @@ -294,7 +294,7 @@ server.resources.getMatch('/NewResource/some-id', 'rest'); ## `server.operation(operation, context?, authorize?)` -Execute an [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') operation programmatically. +Execute an [Operations API](../operations-api/overview.md) operation programmatically. ```ts server.operation(operation: object, context?: { username: string }, authorize?: boolean): Promise @@ -397,5 +397,5 @@ contentTypes.set('text/xml', { - [HTTP Overview](./overview) - [HTTP Configuration](./configuration) -- [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface overview') +- [REST Overview](../rest/overview.md) - [Security API](../security/api.md) diff --git a/reference_versioned_docs/version-v4/http/configuration.md b/reference_versioned_docs/version-v4/http/configuration.md index dc4e2b84..abc21a29 100644 --- a/reference_versioned_docs/version-v4/http/configuration.md +++ b/reference_versioned_docs/version-v4/http/configuration.md @@ -339,4 +339,4 @@ tls: - [HTTP API](./api) - [TLS Configuration](./tls) - [Security Overview](../security/overview.md) -- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full configuration reference') +- [Configuration Overview](../configuration/overview.md) diff --git a/reference_versioned_docs/version-v4/http/overview.md b/reference_versioned_docs/version-v4/http/overview.md index 86568c16..06858a8c 100644 --- a/reference_versioned_docs/version-v4/http/overview.md +++ b/reference_versioned_docs/version-v4/http/overview.md @@ -22,7 +22,7 @@ In previous versions: Session-affinity based socket delegation was used to route ## Request Handling -Harper uses a layered middleware chain for HTTP request processing. Components and applications can add handlers to this chain using the [`server.http()`](./api#serverhttp) API. Handlers are called in order; each handler can either process the request and return a `Response`, or pass it along to the next handler with `next(request)`. +Harper uses a layered middleware chain for HTTP request processing. Components and applications can add handlers to this chain using the [`server.http()`](./api#serverhttplistener-options) API. Handlers are called in order; each handler can either process the request and return a `Response`, or pass it along to the next handler with `next(request)`. Request and response objects follow the [WHATWG Fetch API](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API) conventions (`Request` and `Response` classes), providing good composability for layered middleware and clean mapping to REST resource handlers. @@ -60,5 +60,5 @@ HTTP request logging is not enabled by default. To enable it, add an `http.loggi - [HTTP Configuration](./configuration) - [HTTP API](./api) -- [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface overview') +- [REST Overview](../rest/overview.md) - [Security Overview](../security/overview.md) diff --git a/reference_versioned_docs/version-v4/http/tls.md b/reference_versioned_docs/version-v4/http/tls.md index 2fbaa4b3..7f9ff399 100644 --- a/reference_versioned_docs/version-v4/http/tls.md +++ b/reference_versioned_docs/version-v4/http/tls.md @@ -8,7 +8,7 @@ title: TLS Configuration Harper uses a top-level `tls` section in `harperdb-config.yaml` to configure Transport Layer Security. This configuration is shared by the HTTP server (HTTPS), the MQTT broker (secure MQTT), and any TLS socket servers created via the [HTTP API](./api#serversocketlistener-options). -The `operationsApi` section can optionally define its own `tls` block, which overrides the root `tls` for Operations API traffic only. See the [Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md 'Operations API configuration reference') for more details. +The `operationsApi` section can optionally define its own `tls` block, which overrides the root `tls` for Operations API traffic only. See the [Operations API Configuration](../configuration/operations.md) for more details. Harper must be restarted for TLS configuration changes to take effect. @@ -110,7 +110,7 @@ operationsApi: privateKey: ~/hdb/keys/ops-privateKey.pem ``` -See the [Operations API Configuration](TODO:reference_versioned_docs/version-v4/configuration/operations.md 'Operations API configuration reference') for more details. +See the [Operations API Configuration](../configuration/operations.md) for more details. ## Related diff --git a/reference_versioned_docs/version-v4/logging/api.md b/reference_versioned_docs/version-v4/logging/api.md index afcda71e..68083591 100644 --- a/reference_versioned_docs/version-v4/logging/api.md +++ b/reference_versioned_docs/version-v4/logging/api.md @@ -9,7 +9,7 @@ title: Logging API ## `logger` -The `logger` global is available in all JavaScript components without any imports. It writes structured log entries to the standard Harper log file (`hdb.log`) at the configured `logging.external` level and path. See [Logging Configuration](./configuration#logging-external) for per-component log configuration. +The `logger` global is available in all JavaScript components without any imports. It writes structured log entries to the standard Harper log file (`hdb.log`) at the configured `logging.external` level and path. See [Logging Configuration](./configuration#loggingexternal) for per-component log configuration. The `logger` global is a `MainLogger`. Calling `logger.withTag(tag)` returns a `TaggedLogger` scoped to that tag. diff --git a/reference_versioned_docs/version-v4/logging/configuration.md b/reference_versioned_docs/version-v4/logging/configuration.md index 659296fd..d32b0f28 100644 --- a/reference_versioned_docs/version-v4/logging/configuration.md +++ b/reference_versioned_docs/version-v4/logging/configuration.md @@ -367,4 +367,4 @@ http: - [Logging API](./api) - [Logging Operations](./operations) - [Database / Transaction Logging](../database/transaction.md) -- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') +- [Configuration Overview](../configuration/overview.md) diff --git a/reference_versioned_docs/version-v4/logging/operations.md b/reference_versioned_docs/version-v4/logging/operations.md index b4dbab5f..de149d62 100644 --- a/reference_versioned_docs/version-v4/logging/operations.md +++ b/reference_versioned_docs/version-v4/logging/operations.md @@ -88,4 +88,4 @@ _Restricted to super_user roles only._ - [Logging Overview](./overview) - [Logging Configuration](./configuration) - [Database / Transaction Logging](../database/transaction.md) -- [Operations API Overview](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') +- [Operations API Overview](../operations-api/overview.md) diff --git a/reference_versioned_docs/version-v4/mqtt/configuration.md b/reference_versioned_docs/version-v4/mqtt/configuration.md index 3053344f..93205ee9 100644 --- a/reference_versioned_docs/version-v4/mqtt/configuration.md +++ b/reference_versioned_docs/version-v4/mqtt/configuration.md @@ -228,4 +228,4 @@ tls: - [MQTT Overview](./overview) - [TLS Configuration](../http/tls.md) - [Security Overview](../security/overview.md) -- [Configuration Overview](TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Full harperdb-config.yaml reference') +- [Configuration Overview](../configuration/overview.md) diff --git a/reference_versioned_docs/version-v4/mqtt/overview.md b/reference_versioned_docs/version-v4/mqtt/overview.md index 829730b7..3da53187 100644 --- a/reference_versioned_docs/version-v4/mqtt/overview.md +++ b/reference_versioned_docs/version-v4/mqtt/overview.md @@ -139,4 +139,4 @@ Available events: - [HTTP Overview](../http/overview.md) - [Security Overview](../security/overview.md) - [Database Schema](../database/schema.md) -- [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST interface — same path conventions as MQTT topics') +- [REST Overview](../rest/overview.md) diff --git a/reference_versioned_docs/version-v4/operations-api/operations.md b/reference_versioned_docs/version-v4/operations-api/operations.md index d6219815..690aeecd 100644 --- a/reference_versioned_docs/version-v4/operations-api/operations.md +++ b/reference_versioned_docs/version-v4/operations-api/operations.md @@ -390,7 +390,7 @@ Operations for executing SQL statements. Harper SQL is intended for data investigation and use cases where performance is not a priority. For production workloads, use NoSQL or REST operations. SQL performance optimizations are on the roadmap. ::: -Detailed documentation: [TODO:reference_versioned_docs/version-v4/legacy/sql 'Legacy SQL reference'] +Detailed documentation: [SQL Reference](../database/sql.md) | Operation | Description | Role Required | | --------- | ------------------------------------------------------------------ | ------------- | @@ -684,7 +684,7 @@ Resets and replaces the entire clustering configuration. Each entry follows the Operations for reading and updating Harper configuration. -Detailed documentation: [TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview'] +Detailed documentation: [Configuration Overview](../configuration/overview.md) | Operation | Description | Role Required | | ------------------- | ---------------------------------------------------------------- | ------------- | diff --git a/reference_versioned_docs/version-v4/operations-api/overview.md b/reference_versioned_docs/version-v4/operations-api/overview.md index 969d21ab..711d6ebb 100644 --- a/reference_versioned_docs/version-v4/operations-api/overview.md +++ b/reference_versioned_docs/version-v4/operations-api/overview.md @@ -16,7 +16,7 @@ All Operations API requests are sent as HTTP POST requests to the Operations API POST http://:9925/ ``` -See [TODO:reference_versioned_docs/version-v4/configuration/overview.md 'Configuration overview'] for how to change the port and other network settings (`operationsApi.network.port`, `operationsApi.network.securePort`). +See [Configuration Overview](../configuration/overview.md) for how to change the port and other network settings (`operationsApi.network.port`, `operationsApi.network.securePort`). ## Request Format diff --git a/reference_versioned_docs/version-v4/replication/overview.md b/reference_versioned_docs/version-v4/replication/overview.md index 3a5012f7..7847ecc9 100644 --- a/reference_versioned_docs/version-v4/replication/overview.md +++ b/reference_versioned_docs/version-v4/replication/overview.md @@ -45,7 +45,7 @@ replication: securePort: 9933 ``` -You can also manage nodes dynamically through the [Operations API](./clustering.md#operations-api) without editing the config file. +You can also manage nodes dynamically through the [Operations API](./clustering.md) without editing the config file. ### Gossip Discovery @@ -80,7 +80,7 @@ Harper supports PKI-based security and authorization for replication connections - **Certificate-based authentication** (recommended for production): Nodes are identified by the certificate's common name (CN) or Subject Alternative Names (SANs). - **IP-based authentication** (for development/testing): Nodes are identified by IP address when using insecure connections. -Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to ensure revoked certificates cannot be used. OCSP and CRL work automatically with certificates from public CAs when `enableRootCAs` is enabled. For self-signed certificates or private CAs without OCSP/CRL support, use Harper's manual certificate revocation feature. Certificate verification settings follow the same configuration as HTTP mTLS connections (see [Certificate Verification](TODO:reference_versioned_docs/version-v4/security/certificate-verification.md 'HTTP mTLS certificate verification configuration')). +Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to ensure revoked certificates cannot be used. OCSP and CRL work automatically with certificates from public CAs when `enableRootCAs` is enabled. For self-signed certificates or private CAs without OCSP/CRL support, use Harper's manual certificate revocation feature. Certificate verification settings follow the same configuration as HTTP mTLS connections (see [Certificate Verification](../security/certificate-verification.md)). ### Providing Your Own Certificates @@ -306,4 +306,4 @@ The `hdb_certificate` table contains the certificates used for replication conne - [Clustering Operations](./clustering.md) — Operations API for managing cluster nodes and subscriptions - [Sharding](./sharding.md) — Distributing data across a subset of nodes -- [Certificate Management](TODO:reference_versioned_docs/version-v4/security/certificate-management.md 'Dynamic certificate management for replication') +- [Certificate Management](../security/certificate-management.md) diff --git a/reference_versioned_docs/version-v4/resources/overview.md b/reference_versioned_docs/version-v4/resources/overview.md index c8fa7e41..09f47e5b 100644 --- a/reference_versioned_docs/version-v4/resources/overview.md +++ b/reference_versioned_docs/version-v4/resources/overview.md @@ -20,8 +20,8 @@ The Resource API is designed to mirror REST/HTTP semantics: methods map directly ## Relationship to Other Features - **Database tables** extend `Resource` automatically. You can use tables through the Resource API without writing any custom code. -- The **REST plugin** maps incoming HTTP requests to Resource methods. See [REST Overview](TODO:reference_versioned_docs/version-v4/rest/overview.md 'REST plugin reference'). -- The **MQTT plugin** routes publish/subscribe messages to `publish` and `subscribe` Resource methods. See [MQTT Overview](TODO:reference_versioned_docs/version-v4/mqtt/overview.md 'MQTT plugin reference'). +- The **REST plugin** maps incoming HTTP requests to Resource methods. See [REST Overview](../rest/overview.md). +- The **MQTT plugin** routes publish/subscribe messages to `publish` and `subscribe` Resource methods. See [MQTT Overview](../mqtt/overview.md). - **Global APIs** (`tables`, `databases`, `transaction`) provide access to resources from JavaScript code. - The **`jsResource` plugin** (configured in `config.yaml`) registers a JavaScript file's exported Resource classes as endpoints. @@ -48,7 +48,7 @@ type MyTable @table { } ``` -> For more info on the schema API see [`Database / Schema`]() +> For more info on the schema API see [`Database / Schema`](../database/schema.md) Then, in a `resources.js` extend from the `tables.MyTable` global: diff --git a/reference_versioned_docs/version-v4/resources/query-optimization.md b/reference_versioned_docs/version-v4/resources/query-optimization.md index 36a00675..6501565a 100644 --- a/reference_versioned_docs/version-v4/resources/query-optimization.md +++ b/reference_versioned_docs/version-v4/resources/query-optimization.md @@ -66,7 +66,7 @@ More unique values (higher cardinality) = more efficient indexed lookups. For ex ## Relationships and Joins -Harper supports relationship-based queries that join data across tables. See [Schema documentation](TODO:reference_versioned_docs/version-v4/database/schema.md 'Database schema section with relationship directives') for how to define relationships. +Harper supports relationship-based queries that join data across tables. See [Schema documentation](../database/schema.md) for how to define relationships. Join queries involve more lookups and naturally carry more overhead. The same indexing principles apply: diff --git a/reference_versioned_docs/version-v4/rest/server-sent-events.md b/reference_versioned_docs/version-v4/rest/server-sent-events.md index e7decfde..bdffaa1f 100644 --- a/reference_versioned_docs/version-v4/rest/server-sent-events.md +++ b/reference_versioned_docs/version-v4/rest/server-sent-events.md @@ -61,4 +61,4 @@ SSE is simpler to implement and has built-in reconnection in browsers. For scena - [WebSockets](./websockets.md) — Bidirectional real-time connections - [MQTT Overview](../mqtt/overview.md) — Full MQTT pub/sub documentation - [REST Overview](./overview.md) — HTTP methods and URL structure -- [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview') — Custom resource API including `connect()` +- [Resources](../resources/overview.md) — Custom resource API including `connect()` diff --git a/reference_versioned_docs/version-v4/rest/websockets.md b/reference_versioned_docs/version-v4/rest/websockets.md index e38925b4..005b6795 100644 --- a/reference_versioned_docs/version-v4/rest/websockets.md +++ b/reference_versioned_docs/version-v4/rest/websockets.md @@ -36,7 +36,7 @@ By default, `new WebSocket('wss://server/my-resource/341')` accesses the resourc ## Custom `connect()` Handler -WebSocket behavior is driven by the `connect(incomingMessages)` method on a resource class. The method must return an async iterable (or generator) that produces messages to send to the client. For more on implementing custom resources, see [Resource API](TODO:reference_versioned_docs/version-v4/resources/resource-api.md 'Resource API reference'). +WebSocket behavior is driven by the `connect(incomingMessages)` method on a resource class. The method must return an async iterable (or generator) that produces messages to send to the client. For more on implementing custom resources, see [Resource API](../resources/resource-api.md). **Simple echo server**: @@ -103,4 +103,4 @@ In a scenario where messages arrive out-of-order across nodes: - [Server-Sent Events](./server-sent-events.md) — One-way real-time streaming - [MQTT Overview](../mqtt/overview.md) — Full MQTT pub/sub documentation - [REST Overview](./overview.md) — HTTP methods and URL structure -- [Resources](TODO:reference_versioned_docs/version-v4/resources/overview.md 'Resources overview') — Custom resource API including `connect()` +- [Resources](../resources/overview.md) — Custom resource API including `connect()` diff --git a/reference_versioned_docs/version-v4/security/certificate-management.md b/reference_versioned_docs/version-v4/security/certificate-management.md index 7beb1585..79f254b8 100644 --- a/reference_versioned_docs/version-v4/security/certificate-management.md +++ b/reference_versioned_docs/version-v4/security/certificate-management.md @@ -7,7 +7,7 @@ title: Certificate Management -This page covers certificate management for Harper's external-facing HTTP and Operations APIs. For replication certificate management, see [Replication Certificate Management](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Replication clustering and certificate management'). +This page covers certificate management for Harper's external-facing HTTP and Operations APIs. For replication certificate management, see [Replication Certificate Management](../replication/clustering.md). ## Default Behavior diff --git a/reference_versioned_docs/version-v4/security/certificate-verification.md b/reference_versioned_docs/version-v4/security/certificate-verification.md index 7a719585..e2ee6ad2 100644 --- a/reference_versioned_docs/version-v4/security/certificate-verification.md +++ b/reference_versioned_docs/version-v4/security/certificate-verification.md @@ -446,4 +446,4 @@ replication: mTLS is always required for replication and cannot be disabled. This configuration only controls whether certificate revocation checking is performed. -For complete replication configuration, see [Replication Configuration](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Replication clustering configuration'). +For complete replication configuration, see [Replication Configuration](../replication/clustering.md). diff --git a/reference_versioned_docs/version-v4/security/mtls-authentication.md b/reference_versioned_docs/version-v4/security/mtls-authentication.md index e194714d..d1f202e9 100644 --- a/reference_versioned_docs/version-v4/security/mtls-authentication.md +++ b/reference_versioned_docs/version-v4/security/mtls-authentication.md @@ -77,4 +77,4 @@ For help generating and managing certificates, see [Certificate Management](./ce ## Replication -mTLS is always required for Harper replication and cannot be disabled. For replication-specific mTLS configuration, see [Replication Configuration](TODO:reference_versioned_docs/version-v4/replication/clustering.md 'Replication clustering configuration'). +mTLS is always required for Harper replication and cannot be disabled. For replication-specific mTLS configuration, see [Replication Configuration](../replication/clustering.md). diff --git a/reference_versioned_docs/version-v4/security/overview.md b/reference_versioned_docs/version-v4/security/overview.md index c2ddd569..e6abc3d8 100644 --- a/reference_versioned_docs/version-v4/security/overview.md +++ b/reference_versioned_docs/version-v4/security/overview.md @@ -35,10 +35,10 @@ Harper supports three authentication methods: - CORS — Cross-Origin Resource Sharing. - For HTTP server configuration see [HTTP / Configuration / CORS](../http/configuration.md#cors) - - For Operations API configuration see [Operations API / Configuration / Network](TODO:reference_versioned_docs/version-v4/configuration/operations.md#network) + - For Operations API configuration see [Operations API / Configuration](../configuration/operations.md) - SSL & HTTPS — Enabling HTTPS and configuring TLS for the HTTP server. - For HTTP server configuration see [HTTP / Configuration / TLS](../http/tls.md) - - For Operations API configuration see [Operations API / Configuration / TLS](TODO:reference_versioned_docs/version-v4/configuration/operations.md#tls) + - For Operations API configuration see [Operations API / Configuration](../configuration/operations.md) - [Users and Roles](../users-and-roles/overview.md) — Role-Based Access Control (RBAC): defining roles, assigning permissions, and managing users. ## API diff --git a/reference_versioned_docs/version-v4/static-files/overview.md b/reference_versioned_docs/version-v4/static-files/overview.md index 053a8f5d..2d0ea9f5 100644 --- a/reference_versioned_docs/version-v4/static-files/overview.md +++ b/reference_versioned_docs/version-v4/static-files/overview.md @@ -46,7 +46,7 @@ Files are accessed relative to the matched directory root, so `GET /index.html` Added in: v4.5 -`static` is a [Plugin](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview') and supports the standard `files` and `urlPath` configuration options for controlling which files to serve and at what URL path. +`static` is a [Plugin](../components/overview.md) and supports the standard `files` and `urlPath` configuration options for controlling which files to serve and at what URL path. Use `urlPath` to mount the files at a specific URL prefix: @@ -58,7 +58,7 @@ static: Now `GET /app/index.html` returns `site/index.html` and `GET /app/blog/post-1.html` returns `site/blog/post-1.html`. -See [Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — files and urlPath options') for full `files` glob pattern and `urlPath` documentation. +See [Components Overview](../components/overview.md) for full `files` glob pattern and `urlPath` documentation. ## Additional Options @@ -171,4 +171,4 @@ A request to any unmatched path returns `static/index.html` with a `200` status ## Related -- [Components Overview](TODO:reference_versioned_docs/version-v4/components/overview.md 'Components, Applications, and Plugins overview — including files/urlPath options') +- [Components Overview](../components/overview.md) diff --git a/reference_versioned_docs/version-v4/studio/overview.md b/reference_versioned_docs/version-v4/studio/overview.md index c904413d..c6096e3e 100644 --- a/reference_versioned_docs/version-v4/studio/overview.md +++ b/reference_versioned_docs/version-v4/studio/overview.md @@ -17,14 +17,14 @@ If you're looking for the platform as a service interface, go to [Harper Fabric] ## Configuration -To enable the local Studio, set `localStudio.enabled` to `true` in your [configuration file](TODO:reference_versioned_docs/version-v4/configuration/options.md#localstudio 'Configuration options'): +To enable the local Studio, set `localStudio.enabled` to `true` in your [configuration file](../configuration/options.md#localstudio): ```yaml localStudio: enabled: true ``` -The local studio is provided by the [Operations API](TODO:reference_versioned_docs/version-v4/operations-api/overview.md 'Operations API overview') and is available on the configured `operationsApi.port` or `operationsApi.securePort` values. This is `9925` by default. +The local studio is provided by the [Operations API](../operations-api/overview.md) and is available on the configured `operationsApi.port` or `operationsApi.securePort` values. This is `9925` by default. ## Accessing Local Studio From 13e1f53bb59bc49553bbf42ad7b8e7bd4f50cb36 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 15:32:21 -0600 Subject: [PATCH 34/51] Cross reference updates (#468) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs(cross-refs): update old /docs/ links in release notes to v4 reference paths Co-Authored-By: Claude Sonnet 4.6 * docs(cross-refs): update old /docs/ links in learn guides to v4 reference paths Co-Authored-By: Claude Sonnet 4.6 * docs(cross-refs): fix broken links and anchors from Docusaurus build - fabric/index.md: /docs/deployments/configuration#localstudio → /reference/v4/configuration/options#localstudio - fabric/grafana-integration.md: ../docs/developers/operations-api/analytics → /reference/v4/analytics/overview - release-notes/4.2.0: remove broken /docs/administration/cloning link (no target page exists) - release-notes/4.2.0: drop non-existent #adding-components-to-root anchor - release-notes/4.5.0 + index.mdx: #blob-storage → #blob-type (correct heading anchor) - src/pages/index.mdx: update 4 /docs/developers/* links to /reference/v4/ paths Co-Authored-By: Claude Sonnet 4.6 * update memory --------- Co-authored-by: Claude Sonnet 4.6 --- fabric/grafana-integration.md | 2 +- fabric/index.md | 2 +- .../developers/harper-applications-in-depth.mdx | 16 ++++++++-------- .../create-your-first-application.mdx | 4 ++-- .../install-and-connect-harper.mdx | 2 +- memory/MEMORY.md | 12 ++++++------ release-notes/v4-tucker/4.1.0.md | 8 ++++---- release-notes/v4-tucker/4.1.1.md | 4 ++-- release-notes/v4-tucker/4.2.0.md | 12 ++++++------ release-notes/v4-tucker/4.3.0.md | 12 ++++++------ release-notes/v4-tucker/4.4.0.md | 10 +++++----- release-notes/v4-tucker/4.5.0.md | 6 +++--- release-notes/v4-tucker/4.6.0.md | 2 +- release-notes/v4-tucker/index.mdx | 2 +- src/pages/index.mdx | 8 ++++---- 15 files changed, 51 insertions(+), 51 deletions(-) diff --git a/fabric/grafana-integration.md b/fabric/grafana-integration.md index e1843227..338cf21b 100644 --- a/fabric/grafana-integration.md +++ b/fabric/grafana-integration.md @@ -40,4 +40,4 @@ Once the Harper data source is configured, you can start building dashboards in 1. Click on the `Explore` navigation link in the left sidebar. 2. You can now create queries using the Harper data source to visualize your Harper Fabric cluster metrics and logs. - Reference the [Harper Analytics Operations](../docs/developers/operations-api/analytics) for more details on available metrics and query options. + Reference the [Harper Analytics Operations](/reference/v4/analytics/overview) for more details on available metrics and query options. diff --git a/fabric/index.md b/fabric/index.md index 5afd0c30..0f2c415d 100644 --- a/fabric/index.md +++ b/fabric/index.md @@ -8,7 +8,7 @@ Fabric Studio is the web-based GUI for Harper. Studio enables you to administer, [Sign up for free!](https://fabric.harper.fast/#/sign-up) -Harper includes a simplified local Studio that is packaged with all Harper installations and served directly from the cluster. It can be enabled in the [configuration file](/docs/deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://fabric.harper.fast/). +Harper includes a simplified local Studio that is packaged with all Harper installations and served directly from the cluster. It can be enabled in the [configuration file](/reference/v4/configuration/options#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://fabric.harper.fast/). --- diff --git a/learn/developers/harper-applications-in-depth.mdx b/learn/developers/harper-applications-in-depth.mdx index bdd6037a..e6595e1d 100644 --- a/learn/developers/harper-applications-in-depth.mdx +++ b/learn/developers/harper-applications-in-depth.mdx @@ -52,7 +52,7 @@ Beyond the component system, Harper also includes some other important subsystem Harper further classifies components (plugins and applications) as either built-in or custom. **Built-in** components are internal to Harper, require no additional installation steps, and are immediately accessible for use. The `graphqlSchema` and `rest` plugins are great examples of built-in plugins. **Custom** components are external to Harper, generally available as an npm package or git repository, and do require additional installation steps in order to be used. Custom components can be authored by anyone, including Harper. Any of Harper's official custom components are published using the `@harperdb` and `@harperfast` package scopes, such as the [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) plugin for developing Next.js applications or the [`@harperdb/status-check`](https://github.com/HarperDB/status-check) application. -Harper's reference documentation contains detailed documentation for all [built-in components](/docs/reference/components/built-in-extensions). Custom components are documented within their respective repositories. +Harper's reference documentation contains detailed documentation for all [built-in components](/reference/v4/components/overview#built-in-extensions-reference). Custom components are documented within their respective repositories. Harper does not currently include any built-in applications, making "custom applications" a bit redundant. Generally, we just refer to them as "applications". However, there is a multitude of both built-in and custom plugins, and so the documentation tends to specify whenever relevant. @@ -189,7 +189,7 @@ The other two properties are lists containing status objects corresponding to di The Operations API is mainly intended to be used for system administration purposes. This API runs on a separate port than the main application port serving user traffic, providing a distinct interface for clear differentiation between secure system administration and the application interface designed for high-load, performance and application defined actions. (It does have the ability to do data management, which may overlap with application capabilities, but this is part of a full system administration API). -Harper keeps a [reference of all operations](/docs/developers/operations-api) in the Operations API reference documentation, but here a few more you can try immediately: `user_info`, `read_log`, and `describe_all`. +Harper keeps a [reference of all operations](/reference/v4/operations-api/overview) in the Operations API reference documentation, but here a few more you can try immediately: `user_info`, `read_log`, and `describe_all`. For `describe_all` to work, ensure that you are still running the Harper application you created in the previous guide. If you need to, checkout the [`02-rest-api`](https://github.com/HarperFast/create-your-first-application/tree/02-rest-api) branch of the `HarperFast/create-your-first-application` repository to ensure you have the necessary application files for this example. @@ -330,7 +330,7 @@ In previous guides we demonstrated how to use the `harper` and `harper dev` comm - `harper restart` will restart the main process and all threads (different than the thread-only restart from the `dev` command) - `harper status` displays the status of the process including the PID -There are a few more commands not listed here (check out the [CLI reference](/docs/deployments/harper-cli) if you're interested), and there is one more fun trick with the CLI. +There are a few more commands not listed here (check out the [CLI reference](/reference/v4/cli/overview) if you're interested), and there is one more fun trick with the CLI. Certain operations from the Operations API are available as CLI commands! They follow the convention: `harper =`, and return YAML by default. You can always pass `json=true` to see the result in JSON instead. @@ -355,7 +355,7 @@ Create a new file `resources.js` within your Harper application; here we are goi **Resources** are the mechanism for defining custom functionality in your Harper application. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. The corresponding Resource API is a unified API for modeling different data sources within Harper as JavaScript classes. Generally, this is where the core business logic of your application lives. Database tables (the ones defined by `graphqlSchema` entries) are `Resource` classes, and so extending the function of a table is as simple as extending their class. -Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). Furthermore, by simply `export` 'ing a resource class, Harper will generate REST API endpoints for it just like the `@export` directive did in `graphqlSchema`. The [Resource API](/docs/reference/resources) is quite powerful, and we'll dive into different aspects throughout future Learn guides, but for now lets start with a simple example extending the existing `Dog` table that already exists in the application. +Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). Furthermore, by simply `export` 'ing a resource class, Harper will generate REST API endpoints for it just like the `@export` directive did in `graphqlSchema`. The [Resource API](/reference/v4/resources/overview) is quite powerful, and we'll dive into different aspects throughout future Learn guides, but for now lets start with a simple example extending the existing `Dog` table that already exists in the application. Inside of `resources.js` add the following code for defining a `DogWithHumanAge` custom resource: @@ -625,7 +625,7 @@ At this point, you should confident to start tinkering with your own ideas for a ## Additional Resources -- [Operations API](/docs/developers/operations-api/) -- [`logger` global reference](/docs/reference/globals#logger) -- [Resources](/docs/reference/resources/) -- [Components](/docs/reference/components/) +- [Operations API](/reference/v4/operations-api/overview) +- [`logger` global reference](/reference/v4/logging/api) +- [Resources](/reference/v4/resources/overview) +- [Components](/reference/v4/components/overview) diff --git a/learn/getting-started/create-your-first-application.mdx b/learn/getting-started/create-your-first-application.mdx index 15698f9a..bcb96551 100644 --- a/learn/getting-started/create-your-first-application.mdx +++ b/learn/getting-started/create-your-first-application.mdx @@ -371,5 +371,5 @@ harper deploy \ ## Additional Resources -- [Table Schema](../../docs/developers/applications/defining-schemas) reference -- [REST](../../docs/developers/rest) reference +- [Table Schema](/reference/v4/database/schema) reference +- [REST](/reference/v4/rest/overview) reference diff --git a/learn/getting-started/install-and-connect-harper.mdx b/learn/getting-started/install-and-connect-harper.mdx index a30f9354..1213e437 100644 --- a/learn/getting-started/install-and-connect-harper.mdx +++ b/learn/getting-started/install-and-connect-harper.mdx @@ -214,5 +214,5 @@ If you see `HarperDB is running.`, fantastic work! You've successfully installed ## Additional Resources -- [Harper CLI](../../docs/deployments/harper-cli) reference documentation +- [Harper CLI](/reference/v4/cli/overview) reference documentation - [Harper Fabric](../../fabric/) documentation diff --git a/memory/MEMORY.md b/memory/MEMORY.md index e4dd2858..8574252a 100644 --- a/memory/MEMORY.md +++ b/memory/MEMORY.md @@ -42,15 +42,15 @@ All Phase 1A–1D sections are complete and merged: ## Next Steps -**Part 3 (Link Resolution) — Complete** on `link-resolution` branch (10 commits). Merge to `major-version-reorg` via PR review, then continue: +**Part 3 (Link Resolution) — Complete** on `link-resolution` branch. Merged to `major-version-reorg`. -**Part 4 (Cross-Reference Updates)** — Full plan in [`memory/part4-plan.md`](part4-plan.md). +**Part 4 (Cross-Reference Updates) — Complete** on `cross-reference-updates` branch (2 commits). -- Branch: `cross-reference-updates` off `major-version-reorg` -- Scope: ~7 release note files + 1 learn guide with old `/docs/` links -- **First step**: verify URL prefix for new reference pages (check `docusaurus.config.js`) +- URL prefix confirmed: `/reference/v4/` (not `/docs/v4/`) +- Updated 8 release note files (4.1.0–4.6.0 + index.mdx) and 3 learn guides +- Left `/docs/administration/cloning` as-is in 4.2.0.md (no learn guide exists yet) -**Part 5 (Redirects)** — Configure redirects from old paths (`/docs/reference/`, `/docs/developers/`, etc.) to new paths in `docusaurus.config.js`. +**Part 5 (Redirects)** — Configure redirects from old paths (`/docs/reference/`, `/docs/developers/`, `/docs/deployments/`, `/docs/administration/`) to new `/reference/v4/` paths in `docusaurus.config.js`. ### Part 3 Key Decisions diff --git a/release-notes/v4-tucker/4.1.0.md b/release-notes/v4-tucker/4.1.0.md index bcce817d..a0db3e6b 100644 --- a/release-notes/v4-tucker/4.1.0.md +++ b/release-notes/v4-tucker/4.1.0.md @@ -8,17 +8,17 @@ HarperDB 4.1 introduces the ability to use worker threads for concurrently handl This means debugging will be much easier for custom functions. If you install/run HarperDB locally, most modern IDEs like WebStorm and VSCode support worker thread debugging, so you can start HarperDB in your IDE, and set breakpoints in your custom functions and debug them. -The associated routing functionality now includes session affinity support. This can be used to consistently route users to the same thread which can improve caching locality, performance, and fairness. This can be enabled in with the [`http.sessionAffinity` option in your configuration](/docs/4.1/configuration#session-affinity). +The associated routing functionality now includes session affinity support. This can be used to consistently route users to the same thread which can improve caching locality, performance, and fairness. This can be enabled in with the [`http.sessionAffinity` option in your configuration](/reference/v4/configuration/options#http). HarperDB 4.1's NoSQL query handling has been revamped to consistently use iterators, which provide an extremely memory efficient mechanism for directly streaming query results to the network _as_ the query results are computed. This results in faster Time to First Byte (TTFB) (only the first record/value in a query needs to be computed before data can start to be sent), and less memory usage during querying (the entire query result does not need to be stored in memory). These iterators are also available in query results for custom functions and can provide means for custom function code to iteratively access data from the database without loading entire results. This should be a completely transparent upgrade, all HTTP APIs function the same, with the one exception that custom functions need to be aware that they can't access query results by `[index]` (they should use array methods or for-in loops to handle query results). -4.1 includes configuration options for specifying the location of database storage files. This allows you to specifically locate database directories and files on different volumes for better flexibility and utilization of disks and storage volumes. See the [storage configuration](/docs/4.1/configuration#storage) and [schemas configuration](/docs/4.1/configuration#schemas) for information on how to configure these locations. +4.1 includes configuration options for specifying the location of database storage files. This allows you to specifically locate database directories and files on different volumes for better flexibility and utilization of disks and storage volumes. See the [storage configuration](/reference/v4/configuration/options#storage) and [schemas configuration](/reference/v4/database/schema) for information on how to configure these locations. -Logging has been revamped and condensed into one `hdb.log` file. See [logging](/docs/administration/logging/) for more information. +Logging has been revamped and condensed into one `hdb.log` file. See [logging](/reference/v4/logging/overview) for more information. A new operation called `cluster_network` was added, this operation will ping the cluster and return a list of enmeshed nodes. -Custom Functions will no longer automatically load static file routes, instead the `@fastify/static` plugin will need to be registered with the Custom Function server. See [Host A Static Web UI-static](/docs/4.1/custom-functions/host-static). +Custom Functions will no longer automatically load static file routes, instead the `@fastify/static` plugin will need to be registered with the Custom Function server. See [Host A Static Web UI-static](/reference/v4/legacy/custom-functions). Updates to S3 import and export mean that these operations now require the bucket `region` in the request. Also, if referencing a nested object it should be done in the `key` parameter. See examples [here](https://api.harperdb.io/#aa74bbdf-668c-4536-80f1-b91bb13e5024). diff --git a/release-notes/v4-tucker/4.1.1.md b/release-notes/v4-tucker/4.1.1.md index e90c55c6..719fc65d 100644 --- a/release-notes/v4-tucker/4.1.1.md +++ b/release-notes/v4-tucker/4.1.1.md @@ -6,8 +6,8 @@ title: 4.1.1 06/16/2023 -- HarperDB uses improved logic for determining default heap limits and thread counts. When running in a restricted container and on NodeJS 18.15+, HarperDB will use the constrained memory limit to determine heap limits for each thread. In more memory constrained servers with many CPU cores, a reduced default thread count will be used to ensure that excessive memory is not used by many workers. You may still define your own thread count (with `http`/`threads`) in the [configuration](/docs/deployments/configuration). -- An option has been added for [disabling the republishing NATS messages](/docs/deployments/configuration), which can provide improved replication performance in a fully connected network. +- HarperDB uses improved logic for determining default heap limits and thread counts. When running in a restricted container and on NodeJS 18.15+, HarperDB will use the constrained memory limit to determine heap limits for each thread. In more memory constrained servers with many CPU cores, a reduced default thread count will be used to ensure that excessive memory is not used by many workers. You may still define your own thread count (with `http`/`threads`) in the [configuration](/reference/v4/configuration/overview). +- An option has been added for [disabling the republishing NATS messages](/reference/v4/configuration/overview), which can provide improved replication performance in a fully connected network. - Improvements to our OpenShift container. - Dependency security updates. - **Bug Fixes** diff --git a/release-notes/v4-tucker/4.2.0.md b/release-notes/v4-tucker/4.2.0.md index d59172bd..071796d4 100644 --- a/release-notes/v4-tucker/4.2.0.md +++ b/release-notes/v4-tucker/4.2.0.md @@ -12,15 +12,15 @@ HarperDB 4.2 introduces a new interface to accessing our core database engine wi ### Resource API -The [Resource API](/docs/reference/resources) is the new interface for accessing data in HarperDB. It utilizes a uniform interface for accessing data in HarperDB database/tables and is designed to easily be implemented or extended for defining customized application logic for table access or defining custom external data sources. This API has support for connecting resources together for caching and delivering data change and message notifications in real-time. The [Resource API documentation details this interface](/docs/reference/resources). +The [Resource API](/reference/v4/resources/overview) is the new interface for accessing data in HarperDB. It utilizes a uniform interface for accessing data in HarperDB database/tables and is designed to easily be implemented or extended for defining customized application logic for table access or defining custom external data sources. This API has support for connecting resources together for caching and delivering data change and message notifications in real-time. The [Resource API documentation details this interface](/reference/v4/resources/overview). ### Component Architecture -HarperDB's custom functions have evolved towards a full component architecture; our internal functionality is defined as components, and this can be used in a modular way in conjunction with user components. These can all easily be configured and loaded through configuration files, and there is now a [well-defined interface for creating your own components](/docs/reference/components/extensions). Components can easily be deployed/installed into HarperDB using [NPM and Github references as well](/docs/reference/components/applications?_highlight=github#adding-components-to-root). +HarperDB's custom functions have evolved towards a full component architecture; our internal functionality is defined as components, and this can be used in a modular way in conjunction with user components. These can all easily be configured and loaded through configuration files, and there is now a [well-defined interface for creating your own components](/reference/v4/components/extension-api). Components can easily be deployed/installed into HarperDB using [NPM and Github references as well](/reference/v4/components/applications). ### Configurable Database Schemas -HarperDB applications or components support [schema definitions using GraphQL schema syntax](/docs/developers/applications/defining-schemas). This makes it easy to define your table and attribute structure and gives you control over which attributes should be indexed and what types they should be. With schemas in configuration, these schemas can be bundled with an application and deployed together with application code. +HarperDB applications or components support [schema definitions using GraphQL schema syntax](/reference/v4/database/schema). This makes it easy to define your table and attribute structure and gives you control over which attributes should be indexed and what types they should be. With schemas in configuration, these schemas can be bundled with an application and deployed together with application code. ### REST Interface @@ -28,7 +28,7 @@ HarperDB 4.2 introduces a new REST interface for accessing data through best-pra ### Real-Time -HarperDB 4.2 now provides standard interfaces for subscribing to data changes and receiving notifications of changes and messages in real-time. Using these new real-time messaging capabilities with structured data provides a powerful integrated platform for both database style data updates and querying along with message delivery. [Real-time messaging](/docs/developers/real-time) of data is available through several protocols: +HarperDB 4.2 now provides standard interfaces for subscribing to data changes and receiving notifications of changes and messages in real-time. Using these new real-time messaging capabilities with structured data provides a powerful integrated platform for both database style data updates and querying along with message delivery. [Real-time messaging](/reference/v4/rest/websockets) of data is available through several protocols: #### MQTT @@ -50,7 +50,7 @@ Databases are now entirely encapsulated in a file, which means they can be moved ### Clone Node -HarperDB includes new functionality for adding new HarperDB nodes in a cluster. New instances can be configured to clone from a leader node, performing and copying a database snapshot from a leader node, and self-configuring from the leader node as well, to facilitate accelerated deployment of new nodes for fast horizontal scaling to meet demand needs. [See the documentation on Clone Node for more information.](/docs/administration/cloning) +HarperDB includes new functionality for adding new HarperDB nodes in a cluster. New instances can be configured to clone from a leader node, performing and copying a database snapshot from a leader node, and self-configuring from the leader node as well, to facilitate accelerated deployment of new nodes for fast horizontal scaling to meet demand needs. See the replication documentation for more information on node management. ### Operations API terminology updates @@ -62,7 +62,7 @@ Support was added for defining a table with `primary_key` instead of `hash_attri There have been significant changes to `harperdb-config.yaml`, however none of these changes should affect pre-4.2 versions. If you upgrade to 4.2 any existing configuration should be backwards compatible and will not need to be updated. -`harperdb-config.yaml` has had some configuration values added, removed, renamed and defaults changed. Please refer to [harperdb-config.yaml](/docs/deployments/configuration) for the most current configuration parameters. +`harperdb-config.yaml` has had some configuration values added, removed, renamed and defaults changed. Please refer to [harperdb-config.yaml](/reference/v4/configuration/overview) for the most current configuration parameters. - The `http` element has been expanded. - `compressionThreshold` was added. diff --git a/release-notes/v4-tucker/4.3.0.md b/release-notes/v4-tucker/4.3.0.md index 7f0332bb..4d7e82fb 100644 --- a/release-notes/v4-tucker/4.3.0.md +++ b/release-notes/v4-tucker/4.3.0.md @@ -41,7 +41,7 @@ HarperDB also now supports querying with a sort order. Multiple sort orders can /Product?brand.name=Microsoft&sort(price)&select(name,brand{name,size}) ``` -See the [schema definition documentation](/docs/developers/applications/defining-schemas) for more information on defining relationships, and the [REST documentation for more information on queries](/docs/developers/rest). +See the [schema definition documentation](/reference/v4/database/schema) for more information on defining relationships, and the [REST documentation for more information on queries](/reference/v4/rest/overview). #### OpenAPI Specification @@ -81,7 +81,7 @@ HarperDB has upgraded the local studio to match the same version that is offered #### mTLS Support -HarperDB now supports mTLS based authentication for HTTP, WebSockets, and MQTT. See the [configuration documentation for more information](/docs/deployments/configuration). +HarperDB now supports mTLS based authentication for HTTP, WebSockets, and MQTT. See the [configuration documentation for more information](/reference/v4/configuration/overview). #### Single-Level Wildcards @@ -95,11 +95,11 @@ HarperDB's MQTT now supports the retain handling flags for subscriptions that ar HarperDB now supports basic conflict-free data type (CRDT) updates that allow properties to be individually updated and merged when separate properties are updated on different threads or nodes. Individual property CRDT updates are automatically performed when you update individual properties through the resource API. Individual property CRDT updates are used when making `PATCH` requests through the REST API. -The CRDT functionality also supports explicit incrementation to merge multiple parallel incrementation requests with proper summing. See the [Resource API for more information](/docs/reference/resources). +The CRDT functionality also supports explicit incrementation to merge multiple parallel incrementation requests with proper summing. See the [Resource API for more information](/reference/v4/resources/overview). #### Configuration Improvements -The configuration has improved support for detecting port conflicts, handling paths for fastify routes, and now includes support for specifying a heap limit and TLS ciphers. See the [configuration documentation for more information](/docs/deployments/configuration). +The configuration has improved support for detecting port conflicts, handling paths for fastify routes, and now includes support for specifying a heap limit and TLS ciphers. See the [configuration documentation for more information](/reference/v4/configuration/overview). #### Balanced Audit Log Cleanup @@ -115,10 +115,10 @@ Significant improvements were made to handling of free-space to decrease free-sp #### Compact Database -In addition to storage improvements, HarperDB now includes functionality for [compacting a database](/docs/deployments/harper-cli) (while offline), which can be used to eliminate all free-space to reset any fragmentation. +In addition to storage improvements, HarperDB now includes functionality for [compacting a database](/reference/v4/cli/overview) (while offline), which can be used to eliminate all free-space to reset any fragmentation. #### Compression Compression is now enabled by default for all records over 4KB. -To learn more on how to configure compression visit [configuration](/docs/deployments/configuration). +To learn more on how to configure compression visit [configuration](/reference/v4/configuration/overview). diff --git a/release-notes/v4-tucker/4.4.0.md b/release-notes/v4-tucker/4.4.0.md index 8b6623be..06b5638b 100644 --- a/release-notes/v4-tucker/4.4.0.md +++ b/release-notes/v4-tucker/4.4.0.md @@ -10,13 +10,13 @@ title: 4.4.0 ### Native Replication -HarperDB has a completely [new native replication system](/docs/developers/replication/) which is faster, more efficient, secure, and reliable than the previous replication system. The new system (codenamed "Plexus") uses direct WebSocket connections between servers with highly optimized encoding and is driven by direct tracking audit/transaction log for efficient and flexible data transfer. This replication has improved resilience with the ability to reach consensus consistency when one node goes down through cross-node catch-up. Network connections can be performed over the existing operations API port or a separate port, for improved configurability. +HarperDB has a completely [new native replication system](/reference/v4/replication/overview) which is faster, more efficient, secure, and reliable than the previous replication system. The new system (codenamed "Plexus") uses direct WebSocket connections between servers with highly optimized encoding and is driven by direct tracking audit/transaction log for efficient and flexible data transfer. This replication has improved resilience with the ability to reach consensus consistency when one node goes down through cross-node catch-up. Network connections can be performed over the existing operations API port or a separate port, for improved configurability. The native replication system is much easier to configure, with multiple options for authentication and security, including PKI/mTLS security that is highly robust and easy to use in conjunction with existing PKI certificates. Replication can be configured through explicit subscriptions or for automated replication of all data in a database. With automated replication, gossiping is used to automatically discover and connect to other nodes in the cluster. #### Sharding -The new replication system also includes provisional support for [sharding](/docs/developers/replication/sharding). This sharding mechanism paves the way for greater scalability and performance, by allow data to be distributed across multiple nodes. +The new replication system also includes provisional support for [sharding](/reference/v4/replication/sharding). This sharding mechanism paves the way for greater scalability and performance, by allow data to be distributed across multiple nodes. #### Replicated Operations @@ -24,15 +24,15 @@ Certain operations can now be replicated across the cluster, including the deplo ### Computed Properties -Computed properties allow applications to define properties that are computed from other properties, allowing for composite properties that are calculated from other data stored in records without requiring actual storage of the computed value. For example, you could have a computed property for a full name based on first and last, or age/duration based on a date. Computed properties are also foundational for custom indexes. See the [schema documentation](/docs/developers/applications/defining-schemas), [Resource API](/docs/reference/resources), and our blog post on [computed properties](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) for more information. +Computed properties allow applications to define properties that are computed from other properties, allowing for composite properties that are calculated from other data stored in records without requiring actual storage of the computed value. For example, you could have a computed property for a full name based on first and last, or age/duration based on a date. Computed properties are also foundational for custom indexes. See the [schema documentation](/reference/v4/database/schema), [Resource API](/reference/v4/resources/overview), and our blog post on [computed properties](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) for more information. ### Custom Indexing -Custom indexes can now be defined using computed properties to allow for unlimited possibilities of indexing, including composite, full-text indexing, vector indexing. Again, see the [schema documentation](/docs/developers/applications/defining-schemas) for more information. +Custom indexes can now be defined using computed properties to allow for unlimited possibilities of indexing, including composite, full-text indexing, vector indexing. Again, see the [schema documentation](/reference/v4/database/schema) for more information. ### Native Graph Support -HarperDB now includes provisional support for native [GraphQL querying functionality](/docs/reference/graphql). This allows for querying of graph data using GraphQL syntax. This is provisional and some APIs may be updated in the future. +HarperDB now includes provisional support for native [GraphQL querying functionality](/reference/v4/graphql-querying/overview). This allows for querying of graph data using GraphQL syntax. This is provisional and some APIs may be updated in the future. ### Dynamic Certificate Management diff --git a/release-notes/v4-tucker/4.5.0.md b/release-notes/v4-tucker/4.5.0.md index d5395603..d7635244 100644 --- a/release-notes/v4-tucker/4.5.0.md +++ b/release-notes/v4-tucker/4.5.0.md @@ -10,7 +10,7 @@ title: 4.5.0 ### Blob Storage -4.5 introduces a new [Blob storage system](/docs/reference/blob), that is designed to efficiently handle large binary objects, with built-in support for streaming large content/media in and out of storage. This provides significantly better performance and functionality for large unstructured data, such as HTML, images, video, and other large files. Components can leverage this functionality through the JavaScript `Blob` interface, and the new `createBlob` function. Blobs are fully replicated and integrated. Harper can also coerce strings to `Blob`s (when dictated by the field type), making it feasible to use blobs for large string data, including with MQTT messaging. +4.5 introduces a new [Blob storage system](/reference/v4/database/schema#blob-type), that is designed to efficiently handle large binary objects, with built-in support for streaming large content/media in and out of storage. This provides significantly better performance and functionality for large unstructured data, such as HTML, images, video, and other large files. Components can leverage this functionality through the JavaScript `Blob` interface, and the new `createBlob` function. Blobs are fully replicated and integrated. Harper can also coerce strings to `Blob`s (when dictated by the field type), making it feasible to use blobs for large string data, including with MQTT messaging. ### Password Hashing Upgrade @@ -53,11 +53,11 @@ There is a new `loadEnv` component loader that can be used to load environmental ### Cluster Status Information -The [`cluster_status` operation](/docs/developers/operations-api/clustering) now includes new statistics for replication, including the timestamps of last received transactions, sent transactions, and committed transactions. +The [`cluster_status` operation](/reference/v4/replication/clustering) now includes new statistics for replication, including the timestamps of last received transactions, sent transactions, and committed transactions. ### Improved URL path parsing -Resources can be defined with nested paths and directly accessed by the exact path without requiring a trailing slash. The `id.property` syntax for accessing properties in URLs will only be applied to properties that are declared in a schema. This allows for URLs to generally include dots in paths without being interpreted as property access. A new [`directURLMapping` option/flag](/docs/deployments/configuration) on resources that allows for more direct URL path handling as well. +Resources can be defined with nested paths and directly accessed by the exact path without requiring a trailing slash. The `id.property` syntax for accessing properties in URLs will only be applied to properties that are declared in a schema. This allows for URLs to generally include dots in paths without being interpreted as property access. A new [`directURLMapping` option/flag](/reference/v4/configuration/overview) on resources that allows for more direct URL path handling as well. ### `server.authenticateUser` API diff --git a/release-notes/v4-tucker/4.6.0.md b/release-notes/v4-tucker/4.6.0.md index 3188224c..bc5fc710 100644 --- a/release-notes/v4-tucker/4.6.0.md +++ b/release-notes/v4-tucker/4.6.0.md @@ -25,7 +25,7 @@ An important change is that logging to standard out/error will _not_ include the ### Data Loader -4.6 includes a new [data loader](/docs/developers/applications/data-loader) that can be used to load data into HarperDB as part of a component. The data loader can be used to load data from JSON file and can be deployed and distributed with a component to provide a reliable mechanism for ensuring specific records are loaded into Harper. +4.6 includes a new [data loader](/reference/v4/database/data-loader) that can be used to load data into HarperDB as part of a component. The data loader can be used to load data from JSON file and can be deployed and distributed with a component to provide a reliable mechanism for ensuring specific records are loaded into Harper. ### Resource API Upgrades diff --git a/release-notes/v4-tucker/index.mdx b/release-notes/v4-tucker/index.mdx index adedc99b..ec62ccd1 100644 --- a/release-notes/v4-tucker/index.mdx +++ b/release-notes/v4-tucker/index.mdx @@ -25,7 +25,7 @@ HarperDB version 4 ([Tucker release](v4-tucker/tucker)) represents major step fo ## -- Blob Storage - 4.5 introduces a new [Blob storage system](/docs/reference/blob). +- Blob Storage - 4.5 introduces a new [Blob storage system](/reference/v4/database/schema#blob-type). - Password Hashing Upgrade - two new password hashing algorithms for better security (to replace md5). - New resource and storage Analytics diff --git a/src/pages/index.mdx b/src/pages/index.mdx index 1e975307..b18ffad3 100644 --- a/src/pages/index.mdx +++ b/src/pages/index.mdx @@ -28,25 +28,25 @@ The best way to get started using Harper is to head over to the [Learn](/learn/) items={[ { type: 'link', - href: '/docs/developers/applications/', + href: '/reference/v4/components/overview', label: 'Harper Applications', description: 'Build your a fully featured Harper Component with custom functionality', }, { type: 'link', - href: '/docs/developers/rest', + href: '/reference/v4/rest/overview', label: 'REST Queries', description: 'The recommended HTTP interface for data access, querying, and manipulation', }, { type: 'link', - href: '/docs/developers/operations-api/', + href: '/reference/v4/operations-api/overview', label: 'Operations API', description: 'Configure, deploy, administer, and control your Harper instance', }, { type: 'link', - href: '/docs/developers/replication/', + href: '/reference/v4/replication/overview', label: 'Clustering & Replication', description: 'The process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns.', From 99bf4d819d64604c8ebbda49153ca147f29ac96c Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 16:10:33 -0600 Subject: [PATCH 35/51] checkpoint before deleting old content files --- docusaurus.config.ts | 18 +- memory/MEMORY.md | 2 +- memory/part4-plan.md | 143 --------- memory/part5-redirects.md | 214 ++++++++++++++ redirects.ts | 589 +++++++++++++++----------------------- 5 files changed, 457 insertions(+), 509 deletions(-) delete mode 100644 memory/part4-plan.md create mode 100644 memory/part5-redirects.md diff --git a/docusaurus.config.ts b/docusaurus.config.ts index bd504c5a..464af95d 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -1,7 +1,7 @@ import { themes as prismThemes } from 'prism-react-renderer'; import type { Config } from '@docusaurus/types'; import type * as Preset from '@docusaurus/preset-classic'; -import { generateRedirects, createRedirects as createRedirectsBase } from './redirects'; +import { redirects, createRedirects } from './redirects'; // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...) @@ -215,14 +215,14 @@ const config: Config = { }, ], - // // Redirects - // [ - // '@docusaurus/plugin-client-redirects', - // { - // redirects: generateRedirects(routeBasePath), - // createRedirects: (existingPath: string) => createRedirectsBase(existingPath, routeBasePath), - // }, - // ], + // Redirects + [ + '@docusaurus/plugin-client-redirects', + { + redirects, + createRedirects, + }, + ], // Sitemap [ diff --git a/memory/MEMORY.md b/memory/MEMORY.md index 8574252a..76043bf2 100644 --- a/memory/MEMORY.md +++ b/memory/MEMORY.md @@ -50,7 +50,7 @@ All Phase 1A–1D sections are complete and merged: - Updated 8 release note files (4.1.0–4.6.0 + index.mdx) and 3 learn guides - Left `/docs/administration/cloning` as-is in 4.2.0.md (no learn guide exists yet) -**Part 5 (Redirects)** — Configure redirects from old paths (`/docs/reference/`, `/docs/developers/`, `/docs/deployments/`, `/docs/administration/`) to new `/reference/v4/` paths in `docusaurus.config.js`. +**Part 5 (Redirects)** — Complete. `redirects.ts` rewritten from analytics data, plugin uncommented in `docusaurus.config.ts`. See `memory/part5-redirects.md` for details. Needs build verification + review of LOW TRAFFIC (<10 view) paths. ### Part 3 Key Decisions diff --git a/memory/part4-plan.md b/memory/part4-plan.md deleted file mode 100644 index 1d243338..00000000 --- a/memory/part4-plan.md +++ /dev/null @@ -1,143 +0,0 @@ -# Part 4: Cross-Reference Updates — Plan & Procedure - -## Overview - -Update links in `release-notes/` and `learn/` that point to old doc paths, mapping them to the new `reference_versioned_docs/version-v4/` structure. - -**Branch**: Create a new branch `cross-reference-updates` off `major-version-reorg` (after `link-resolution` is merged). - -**Commit strategy**: One commit per file group (release notes in one commit, learn guides in another, or broken down further if large). - ---- - -## Scope of Changes - -### Release Notes (`release-notes/v4-tucker/`) - -171 files total. Only ~7 files have `/docs/` links that need updating. The full list of unique links found (grep: `(/docs/[^)"\ ]*)` across all `release-notes/v4-tucker/*.md`): - -| Old Path | New Path | Notes | -| ------------------------------------------------------------------------------------- | ------------------------------------------------------------ | --------------------------------------------------- | -| `/docs/deployments/configuration` | `/docs/v4/configuration/overview` | 7 occurrences | -| `/docs/reference/resources` | `/docs/v4/resources/overview` | 4 occurrences | -| `/docs/developers/applications/defining-schemas` | `/docs/v4/database/schema` | 4 occurrences | -| `/docs/reference/graphql` | `/docs/v4/graphql-querying/overview` | 1 occurrence | -| `/docs/reference/components/extensions` | `/docs/v4/components/extension-api` | 1 occurrence | -| `/docs/reference/components/applications?_highlight=github#adding-components-to-root` | `/docs/v4/components/applications#adding-components-to-root` | 1 occurrence | -| `/docs/reference/blob` | `/docs/v4/database/schema#blob-storage` | 1 occurrence | -| `/docs/developers/rest` | `/docs/v4/rest/overview` | 1 occurrence | -| `/docs/developers/replication/sharding` | `/docs/v4/replication/sharding` | 1 occurrence | -| `/docs/developers/replication/` | `/docs/v4/replication/overview` | 1 occurrence | -| `/docs/developers/real-time` | `/docs/v4/rest/websockets` | 1 occurrence (real-time = websockets+SSE+MQTT) | -| `/docs/developers/operations-api/clustering` | `/docs/v4/replication/clustering` | 1 occurrence | -| `/docs/developers/applications/data-loader` | `/docs/v4/database/data-loader` | 1 occurrence | -| `/docs/deployments/harper-cli` | `/docs/v4/cli/overview` | 1 occurrence | -| `/docs/administration/logging/` | `/docs/v4/logging/overview` | 1 occurrence | -| `/docs/administration/cloning` | N/A — learn guide (not in reference) | Leave or link to learn guide if exists | -| `/docs/4.1/custom-functions/host-static` | `/docs/v4/legacy/custom-functions` | Legacy redirect | -| `/docs/4.1/configuration#storage` | `/docs/v4/configuration/options#storage` | 1 occurrence | -| `/docs/4.1/configuration#session-affinity` | `/docs/v4/configuration/options#http` | 1 occurrence (http section covers session affinity) | -| `/docs/4.1/configuration#schemas` | `/docs/v4/database/schema` | 1 occurrence | - -> **NOTE**: The exact URL prefix for the new structure (`/docs/v4/`) needs to be verified. Check `docusaurus.config.js` or `reference_versioned_sidebars/version-v4-sidebars.json` for the versioned path prefix. It may be `/docs/v4/` or `/reference/v4/` or similar. - -**Files that contain links (to edit):** - -- `release-notes/v4-tucker/4.1.0.md` — `/docs/4.1/configuration#*` and `/docs/4.1/custom-functions/*` -- `release-notes/v4-tucker/4.2.0.md` — `/docs/reference/resources`, `/docs/reference/components/*` -- `release-notes/v4-tucker/4.3.0.md` — `/docs/reference/resources` -- `release-notes/v4-tucker/4.4.0.md` — `/docs/developers/applications/defining-schemas`, `/docs/reference/resources`, `/docs/reference/graphql` -- `release-notes/v4-tucker/4.5.0.md` — `/docs/reference/blob`, `/docs/deployments/configuration` - -**To find all affected files precisely**: `grep -rl "/docs/" release-notes/v4-tucker/` - ---- - -### Learn Guides (`learn/`) - -Only 4 content files currently exist (most are stubs): - -- `learn/developers/harper-applications-in-depth.mdx` -- `learn/getting-started/create-your-first-application.mdx` -- `learn/getting-started/install-and-connect-harper.mdx` -- `learn/index.mdx` - -Links found in `harper-applications-in-depth.mdx`: - -| Old Path | New Path | -| ------------------------------------------------ | ------------------------------------------------------------ | -| `/docs/reference/components/built-in-extensions` | `/docs/v4/components/overview#built-in-extensions-reference` | -| `/docs/reference/resources` | `/docs/v4/resources/overview` | -| `/docs/reference/globals#logger` | `/docs/v4/logging/api` | -| `/docs/reference/resources/` | `/docs/v4/resources/overview` | -| `/docs/reference/components/` | `/docs/v4/components/overview` | - ---- - -## Procedure - -### Step 1: Verify URL prefix - -Before editing any links, confirm what the new URL prefix is for `reference_versioned_docs/version-v4/`. Check: - -```bash -cat docusaurus.config.js | grep -A5 "reference_versioned" -# or -cat reference_versioned_sidebars/version-v4-sidebars.json | head -5 -``` - -The prefix is likely `/docs/v4/` but confirm before proceeding. - -### Step 2: Find all affected release note files - -```bash -grep -rl "/docs/" release-notes/v4-tucker/ -``` - -This gives the exact list of files to edit. - -### Step 3: Edit release notes - -For each affected file, replace old `/docs/` paths with new `/docs/v4/` paths per the mapping table above. - -### Step 4: Edit learn guides - -Read each of the 4 learn guide files, apply the mapping table above. - -### Step 5: Check for any remaining old-path links across the whole repo - -```bash -grep -rn "/docs/reference/" --include="*.md" --include="*.mdx" release-notes/ learn/ -grep -rn "/docs/developers/" --include="*.md" --include="*.mdx" release-notes/ learn/ -grep -rn "/docs/deployments/" --include="*.md" --include="*.mdx" release-notes/ learn/ -grep -rn "/docs/administration/" --include="*.md" --include="*.mdx" release-notes/ learn/ -grep -rn "/docs/4\." --include="*.md" --include="*.mdx" release-notes/ learn/ -``` - -### Step 6: Commit - -- Commit release notes changes: `docs(cross-refs): update old /docs/ links in release notes to v4 reference paths` -- Commit learn guide changes: `docs(cross-refs): update old /docs/ links in learn guides to v4 reference paths` - ---- - -## Key Uncertainties to Resolve - -1. **URL prefix** — Confirm whether new reference pages are served at `/docs/v4/`, `/reference/v4/`, or another prefix. **Critical before editing any links.** -2. **`/docs/administration/cloning`** — This was flagged in migration map as "move to Learn guide." If no learn guide exists yet, either leave as-is (broken link) or remove the link text. -3. **`/docs/developers/real-time`** — This page covered WebSockets, SSE, and MQTT. Best split into: WebSockets content → `rest/websockets`, MQTT content → `mqtt/overview`. In context of release notes, pick whichever is most relevant to the surrounding text. - ---- - -## Non-Goals for Part 4 - -- Do NOT edit `versioned_docs/` files -- Do NOT edit `reference_versioned_docs/` files (those were handled in Part 3) -- Do NOT update links in the v1/v2/v3 release notes (out of scope) -- Do NOT update links in other config files (docusaurus.config.js, sidebars, etc.) — that's Part 5 - ---- - -## After Part 4 - -Proceed to **Part 5: Redirects** — configure redirects from old `/docs/developers/`, `/docs/reference/`, etc. paths to the new `/docs/v4/` equivalents in `docusaurus.config.js` (or wherever redirects are configured). diff --git a/memory/part5-redirects.md b/memory/part5-redirects.md new file mode 100644 index 00000000..8eeb27d6 --- /dev/null +++ b/memory/part5-redirects.md @@ -0,0 +1,214 @@ +# Part 5: Redirects Work + +## Status: Implementation complete — needs build verification + human review of LOW TRAFFIC items + +## Overview + +Rewriting `redirects.ts` to handle migrations from old `/docs/` paths to new `/reference/v4/` paths. +The redirect plugin is currently commented out in `docusaurus.config.ts` (lines 218-225). + +**Key constraint:** No redirects needed for the new `/reference/` section itself. The `/learn/`, `/release-notes/`, and `/fabric/` sections need very few redirects (flag exceptions). + +## The New URL Structure + +New reference paths live at `/reference/v4/[section]/[page]`: + +| Section | Key Pages | +|----------------------|---------------------------------------------------------------------------| +| analytics | overview, operations | +| cli | overview, commands, authentication, operations-api-commands | +| components | overview, applications, extension-api, javascript-environment, plugin-api | +| configuration | overview, options, operations | +| database | overview, schema, api, data-loader, storage-algorithm, jobs, system-tables, compaction, transaction, sql | +| environment-variables| overview | +| fastify-routes | overview | +| graphql-querying | overview | +| http | overview, configuration, api, tls | +| legacy | cloud, custom-functions | +| logging | overview, configuration, api, operations | +| mqtt | overview, configuration | +| operations-api | overview, operations | +| replication | overview, clustering, sharding | +| resources | overview, resource-api, query-optimization | +| rest | overview, querying, headers, content-types, websockets, server-sent-events| +| security | overview, basic-authentication, jwt-authentication, mtls-authentication, certificate-management, certificate-verification, configuration, api | +| static-files | overview | +| studio | overview | +| users-and-roles | overview, configuration, operations | + +## Old Path Structure (v4.7) + +The old docs were at `/docs/` serving the latest (4.7) content: + +- `/docs/developers/applications/*` → Components (new path) +- `/docs/developers/operations-api/*` → Operations API + various sections +- `/docs/developers/security/*` → Security +- `/docs/developers/replication/*` → Replication +- `/docs/developers/real-time` → REST (websockets/SSE) +- `/docs/developers/rest` → REST +- `/docs/developers/clustering/*` → Replication/clustering +- `/docs/developers/components/*` → (old reference/components - different from apps) +- `/docs/deployments/configuration` → Configuration +- `/docs/deployments/harper-cli` → CLI +- `/docs/deployments/install-harper/*` → (install - no new reference page) +- `/docs/deployments/harper-cloud/*` → Legacy/cloud +- `/docs/deployments/upgrade-hdb-instance` → (no direct equivalent in new ref) +- `/docs/administration/harper-studio/*` → Studio +- `/docs/administration/logging/*` → Logging +- `/docs/administration/cloning` → Replication +- `/docs/administration/compact` → Database/compaction +- `/docs/administration/jobs` → Database/jobs +- `/docs/reference/*` → Old reference section (reference/analytics, reference/resources/*, etc.) +- `/docs/foundations/*` → learn/ (already handled) +- `/docs/getting-started/*` → learn/ (already handled) + +## Analytics: Top Paths Requiring New Redirects (views > 50) + +Paths from GA data (Oct 2025 – Feb 2026) that need redirects to `/reference/v4/`: + +### High Priority (>200 views) +- `/docs/developers/operations-api` (1028) → `/reference/v4/operations-api/overview` +- `/docs/developers/applications` (727) → `/reference/v4/components/overview` +- `/docs/reference/resources` (667) → `/reference/v4/resources/overview` +- `/docs/deployments/configuration` (608) → `/reference/v4/configuration/overview` +- `/docs/developers/rest` (547) → `/reference/v4/rest/overview` +- `/docs/deployments/harper-cli` (467) → `/reference/v4/cli/overview` +- `/docs/reference` (459) → `/reference/v4` (index) +- `/docs/developers/applications/defining-schemas` (455) → `/reference/v4/database/schema` +- `/docs/developers/operations-api/nosql-operations` (435) → `/reference/v4/operations-api/operations` +- `/docs/developers/applications/caching` (410) → `/reference/v4/resources/overview` (or resource-api) +- `/docs/developers/real-time` (407) → `/reference/v4/rest/websockets` (or rest/overview) +- `/docs/developers/operations-api/databases-and-tables` (385) → `/reference/v4/database/overview` +- `/docs/developers/operations-api/components` (356) → `/reference/v4/operations-api/operations` +- `/docs/deployments/install-harper` (343) → keep as-is (deploy content, not in new ref) +- `/docs/developers/replication` (328) → `/reference/v4/replication/overview` +- `/docs/developers/operations-api/advanced-json-sql-examples` (158) → `/reference/v4/operations-api/operations` +- `/docs/developers/operations-api/bulk-operations` (158) → `/reference/v4/operations-api/operations` + +### Medium Priority (50–200 views) +- `/docs/developers/applications/data-loader` (218) → `/reference/v4/database/data-loader` +- `/docs/developers/operations-api/system-operations` (213) → `/reference/v4/operations-api/operations` +- `/docs/reference/components/built-in-extensions` (204) → `/reference/v4/components/extension-api` +- `/docs/developers/operations-api/configuration` (203) → `/reference/v4/configuration/operations` +- `/docs/developers/applications/web-applications` (199) → `/reference/v4/components/applications` +- `/docs/developers/operations-api/users-and-roles` (195) → `/reference/v4/users-and-roles/operations` +- `/docs/developers/security` (183) → `/reference/v4/security/overview` +- `/docs/reference/resources/instance-binding` (181) → `/reference/v4/resources/resource-api` +- `/docs/developers/applications/debugging` (150) → `/reference/v4/components/overview` +- `/docs/reference/components/plugins` (150) → `/reference/v4/components/plugin-api` +- `/docs/developers/applications/define-routes` (144) → `/reference/v4/fastify-routes/overview` +- `/docs/reference/analytics` (135) → `/reference/v4/analytics/overview` +- `/docs/developers/replication/sharding` (133) → `/reference/v4/replication/sharding` +- `/docs/developers/operations-api/logs` (132) → `/reference/v4/logging/operations` +- `/docs/reference/dynamic-schema` (132) → `/reference/v4/database/schema` +- `/docs/administration/harper-studio` (130) → `/reference/v4/studio/overview` +- `/docs/reference/graphql` (109) → `/reference/v4/graphql-querying/overview` +- `/docs/reference/resources/migration` (109) → `/reference/v4/database/data-loader` +- `/docs/reference/data-types` (107) → `/reference/v4/database/schema` +- `/docs/reference/architecture` (105) → `/reference/v4` (no direct equiv - use index) +- `/docs/developers/operations-api/clustering-nats` (80) → `/reference/v4/replication/clustering` +- `/docs/developers/operations-api/token-authentication` (79) → `/reference/v4/security/jwt-authentication` +- `/docs/reference/transactions` (79) → `/reference/v4/database/transaction` +- `/docs/reference/limits` (78) → `/reference/v4/database/schema` (or overview) +- `/docs/developers/security/jwt-auth` (77) → `/reference/v4/security/jwt-authentication` +- `/docs/developers/security/certificate-management` (76) → `/reference/v4/security/certificate-management` +- `/docs/reference/blob` (76) → `/reference/v4/database/schema` +- `/docs/reference/components/configuration` (74) → `/reference/v4/components/overview` +- `/docs/developers/security/configuration` (98) → `/reference/v4/security/configuration` +- `/docs/developers/security/users-and-roles` (93) → `/reference/v4/users-and-roles/overview` +- `/docs/administration/cloning` (87) → `/reference/v4/replication/overview` +- `/docs/developers/operations-api/certificate-management` (114) → `/reference/v4/security/certificate-management` +- `/docs/developers/operations-api/custom-functions` (113) → `/reference/v4/legacy/custom-functions` +- `/docs/developers/operations-api/jobs` (113) → `/reference/v4/database/jobs` +- `/docs/developers/security/basic-auth` (83) → `/reference/v4/security/basic-authentication` +- `/docs/reference/globals` (277) → `/reference/v4/components/javascript-environment` +- `/docs/reference/components` (159) → `/reference/v4/components/overview` +- `/docs/reference/components/extensions` (102) → `/reference/v4/components/extension-api` +- `/docs/reference/components/applications` (121) → `/reference/v4/components/applications` +- `/docs/developers/applications/defining-roles` (119) → `/reference/v4/users-and-roles/overview` +- `/docs/developers/operations-api/sql-operations` (96) → `/reference/v4/database/sql` +- `/docs/administration/logging/standard-logging` (91) → `/reference/v4/logging/overview` +- `/docs/administration/logging` (68) → `/reference/v4/logging/overview` +- `/docs/reference/roles` (62) → `/reference/v4/users-and-roles/overview` +- `/docs/reference/storage-algorithm` (61) → `/reference/v4/database/storage-algorithm` +- `/docs/developers/sql-guide` (53) → `/reference/v4/database/sql` +- `/docs/developers/operations-api/registration` (59) → `/reference/v4/operations-api/operations` +- `/docs/administration/compact` (56) → `/reference/v4/database/compaction` +- `/docs/reference/resources/query-optimization` (55) → `/reference/v4/resources/query-optimization` +- `/docs/administration/jobs` (54) → `/reference/v4/database/jobs` +- `/docs/developers/operations-api/analytics` (145) → `/reference/v4/analytics/operations` +- `/docs/developers/operations-api/quickstart-examples` (145) → `/reference/v4/operations-api/operations` +- `/docs/reference/content-types` (70) → `/reference/v4/rest/content-types` +- `/docs/reference/headers` (46) → `/reference/v4/rest/headers` +- `/docs/developers/security/certificate-verification` (46) → `/reference/v4/security/certificate-verification` +- `/docs/administration/logging/audit-logging` (72) → `/reference/v4/logging/overview` +- `/docs/developers/clustering` (72) → `/reference/v4/replication/clustering` +- `/docs/administration/logging/transaction-logging` (45) → `/reference/v4/logging/overview` +- `/docs/reference/clustering` (31) → `/reference/v4/replication/clustering` +- `/docs/reference/clustering/enabling-clustering` (25) → `/reference/v4/replication/clustering` +- `/docs/reference/clustering/establishing-routes` (20) → `/reference/v4/replication/clustering` +- `/docs/reference/clustering/subscription-overview` (19) → `/reference/v4/replication/clustering` +- `/docs/reference/sql-guide` (26) → `/reference/v4/database/sql` +- `/docs/reference/sql-guide/json-search` (23) → `/reference/v4/database/sql` +- `/docs/developers/security/mtls-auth` (32) → `/reference/v4/security/mtls-authentication` +- `/docs/developers/components/built-in` (26) → `/reference/v4/components/extension-api` +- `/docs/developers/components/reference` (25) → `/reference/v4/components/extension-api` +- `/docs/developers/components` (33) → `/reference/v4/components/overview` +- `/docs/administration/harper-studio/create-account` (45) → `/reference/v4/studio/overview` + +## Paths That DON'T Need Redirects to /reference/v4/ + +- `/docs/deployments/install-harper/*` — installation content, no equivalent in new ref +- `/docs/deployments/harper-cloud/*` — redirect to `/reference/v4/legacy/cloud` (or keep existing) +- `/docs/deployments/upgrade-hdb-instance` — keep existing redirect or drop +- `/docs/administration/harper-studio/*` (most subpages) — redirect to `/reference/v4/studio/overview` +- `/docs/getting-started/*` — already redirects to `/learn/` +- `/docs/foundations/*` — already redirects to `/learn/` + +## Versioned Doc Paths (/docs/4.X/) in Analytics + +Low traffic but some exist. Recommend a general catch-all pattern: +- `/docs/4.X/developers/...` → strip version prefix, apply same rules as `/docs/developers/...` +- `/docs/4.X/reference/...` → strip version prefix, apply same rules as `/docs/reference/...` +- Alternative: redirect `/docs/4.X/...` → `/docs/...` (simpler, single hop) + +## Special Notes for Non-Reference Sections + +### /learn/ — needs few/no new redirects +- Already has redirects for `/getting-started/*` and `/foundations/*` +- `/learn/developers/coming-soon` and `/learn/administration/coming-soon` are real pages, no redirects needed + +### /release-notes/ — existing redirects are fine +- The existing `createRedirects` logic for release-notes path variants (old naming) is worth keeping +- No new redirects needed unless we change the release-notes structure + +### /fabric/ — no redirects needed +- Brand new section with no old paths to redirect from + +## Old redirects.ts Issues + +The existing file has: +1. Many rules dragged from very old docs (HarperDB Studio → Harper Studio, HarperDB Cloud, custom-functions etc.) that are still valid but very old +2. `withBase()` abstraction that adds complexity — the basePath was used when docs were at `/docs/` but now everything is at root +3. Separate `generateRedirects()` and `createRedirects()` (wildcard) functions — the split is conceptually fine +4. Some rules still point to old paths like `/administration/harper-studio/`, `/deployments/install-harper/` etc. which still exist in the current site + +## Approach for New redirects.ts + +1. **Keep** existing rules that redirect very-old paths (pre-Harper) → current paths — these are still valid +2. **Add** new rules for old `/docs/developers/`, `/docs/reference/`, `/docs/administration/`, `/docs/deployments/` → `/reference/v4/` +3. **Use patterns** for versioned paths `/docs/4.X/...` — either: + - Pattern: catch-all redirect `/docs/4.X/` → drop version and apply same rules (cleaner) + - Or just let them 404 — traffic is low (<30 views per page) +4. **Remove** now-redundant `basePath` abstraction since redirect targets are absolute paths +5. **Simplify** `createRedirects` wildcard function to focus on the actual patterns needed + +## Decisions (Confirmed) + +- `/docs/` root (2854 views) → redirect to `/` (site root) +- `/docs/developers/applications/caching` → `/reference/v4/resources/overview` (add comment: eventually redirect to a dedicated learn page for database caching) +- `/docs/reference/globals` → `/reference/v4/components/javascript-environment` ✓ +- Versioned `/docs/4.X/*` paths → **catch-all to `/reference/v4/`** (not per-path mappings; traffic is low) +- No `basePath`/`withBase()` abstraction — all redirect targets are absolute paths, site is served at `/` +- **Clean break**: only keep rules for paths that appear in pageview data. Paths with <10 views are marked for review — we may 404 those. +- The redirect plugin is commented out in `docusaurus.config.ts` — uncomment it as part of this work. diff --git a/redirects.ts b/redirects.ts index 48fa1a7c..638f3fad 100644 --- a/redirects.ts +++ b/redirects.ts @@ -1,364 +1,241 @@ // Redirect configuration for Docusaurus client-side redirects -// Based on GitBook .gitbook.yaml redirects +// Based on pageview analytics (Oct 2025 – Feb 2026) from docs.harper.fast +// +// Sections with NO redirects needed: +// /reference/ — new section, no old paths point here +// /learn/ — already has redirects for /getting-started/* and /foundations/* +// /release-notes/ — existing createRedirects logic handles old naming variants +// /fabric/ — new section, no old paths type RedirectRule = { to: string; from: string | string[]; }; -// Release notes redirects (not affected by base path) -function generateReleaseNotesRedirects(): RedirectRule[] { - // Generate redirects for all old release notes paths to new location - const versions = ['v1-alby', 'v2-penny', 'v3-monkey', 'v4-tucker']; - const redirects: RedirectRule[] = []; - - // Main release notes index - redirect from current version (4.6) path - redirects.push({ - from: '/docs/technical-details/release-notes', - to: '/release-notes', - }); - - // Also redirect from each versioned docs path - const docVersions = ['4.1', '4.2', '4.3', '4.4', '4.5', '4.6']; - for (const docVersion of docVersions) { - redirects.push({ - from: `/docs/${docVersion}/technical-details/release-notes`, - to: '/release-notes', - }); - } - - // Version index pages will be handled by the wildcard createRedirects function - // to avoid duplicates - - return redirects; -} - -// Documentation redirects -function generateDocsRedirects(basePath: string): RedirectRule[] { - // Helper to add base path to a route - const withBase = (path: string) => { - // If basePath is just '/', return path as-is to avoid double slashes - return basePath === '/' ? path : `${basePath}${path}`; - }; - - const redirects: RedirectRule[] = []; - - // Only add root redirect if docs are not at root - if (basePath !== '/') { - redirects.push({ - from: '/', - to: basePath, - }); - } - - redirects.push( - // Operations API - { - from: withBase('/developers/operations-api/utilities'), - to: withBase('/developers/operations-api/system-operations'), - }, - - // Installation paths - { from: withBase('/install-harperdb'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/install-harperdb/linux'), to: withBase('/deployments/install-harper/linux') }, - { from: withBase('/install-harperdb/other'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/install-harperdb/docker'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/install-harperdb/mac'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/install-harperdb/windows'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/install-harperdb/linux-quickstart'), to: withBase('/deployments/install-harper/linux') }, - { from: withBase('/install-harperdb/offline'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/install-harperdb/node-ver-requirement'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/deployments/install-harperdb'), to: withBase('/deployments/install-harper/') }, - { from: withBase('/deployments/install-harperdb/linux'), to: withBase('/deployments/install-harper/linux') }, - { - from: withBase('/getting-started/install-harper'), - to: '/learn/getting-started/install-and-connect-harper', - }, - - // Harper Studio (old HarperDB Studio paths) - { from: withBase('/harperdb-studio'), to: withBase('/administration/harper-studio/') }, - { from: withBase('/harperdb-studio/create-account'), to: withBase('/administration/harper-studio/create-account') }, - { - from: withBase('/harperdb-studio/login-password-reset'), - to: withBase('/administration/harper-studio/login-password-reset'), - }, - { - from: [withBase('/harperdb-studio/resources'), withBase('/administration/harper-studio/resources')], - to: withBase('/administration/harper-studio/'), - }, - { from: withBase('/harperdb-studio/organizations'), to: withBase('/administration/harper-studio/organizations') }, - { from: withBase('/harperdb-studio/instances'), to: withBase('/administration/harper-studio/instances') }, - { - from: withBase('/harperdb-studio/query-instance-data'), - to: withBase('/administration/harper-studio/query-instance-data'), - }, - { - from: withBase('/harperdb-studio/manage-schemas-browse-data'), - to: withBase('/administration/harper-studio/manage-databases-browse-data'), - }, - { - from: [withBase('/harperdb-studio/manage-charts'), withBase('/administration/harper-studio/manage-charts')], - to: withBase('/administration/harper-studio/query-instance-data'), - }, - { - from: withBase('/harperdb-studio/manage-clustering'), - to: withBase('/administration/harper-studio/manage-replication'), - }, - { - from: withBase('/harperdb-studio/manage-instance-users'), - to: withBase('/administration/harper-studio/manage-instance-users'), - }, - { - from: withBase('/harperdb-studio/manage-instance-roles'), - to: withBase('/administration/harper-studio/manage-instance-users'), - }, - { - from: withBase('/harperdb-studio/manage-functions'), - to: withBase('/administration/harper-studio/manage-applications'), - }, - { - from: withBase('/harperdb-studio/instance-metrics'), - to: withBase('/administration/harper-studio/instance-metrics'), - }, - { - from: withBase('/harperdb-studio/instance-configuration'), - to: withBase('/administration/harper-studio/instance-configuration'), - }, - { - from: withBase('/harperdb-studio/enable-mixed-content'), - to: withBase('/administration/harper-studio/enable-mixed-content'), - }, - - // Harper Cloud (old HarperDB Cloud paths) - { from: withBase('/harperdb-cloud'), to: withBase('/deployments/harper-cloud/') }, - - // Security - { from: withBase('/security'), to: withBase('/developers/security/') }, - { from: withBase('/security/jwt-auth'), to: withBase('/developers/security/jwt-auth') }, - { from: withBase('/security/basic-auth'), to: withBase('/developers/security/basic-auth') }, - { from: withBase('/security/configuration'), to: withBase('/developers/security/configuration') }, - { from: withBase('/security/users-and-roles'), to: withBase('/developers/security/users-and-roles') }, - - // Custom Functions → Applications - { from: withBase('/custom-functions'), to: withBase('/developers/applications/') }, - { from: withBase('/custom-functions/define-routes'), to: withBase('/developers/applications/define-routes') }, - { - from: [withBase('/custom-functions/using-npm-git'), withBase('/developers/custom-functions/create-project')], - to: withBase('/developers/applications/'), - }, - { from: withBase('/custom-functions/custom-functions-operations'), to: withBase('/developers/operations-api/') }, - { - from: withBase('/custom-functions/debugging-custom-function'), - to: withBase('/developers/applications/debugging'), - }, - - // SQL Guide Root Page - { from: withBase('/sql-guide'), to: withBase('/reference/sql-guide/') }, - { from: withBase('/developers/sql-guide'), to: withBase('/reference/sql-guide/') }, - - // Clustering Root Page - { from: withBase('/clustering'), to: withBase('/reference/clustering/') }, - - // CLI - { from: withBase('/harperdb-cli'), to: withBase('/deployments/harper-cli') }, - { from: withBase('/deployments/harperdb-cli'), to: withBase('/deployments/harper-cli') }, - - // Top-level paths - { from: withBase('/configuration'), to: withBase('/deployments/configuration') }, - { from: withBase('/logging'), to: withBase('/administration/logging/standard-logging') }, - { from: withBase('/transaction-logging'), to: withBase('/administration/logging/transaction-logging') }, - { from: withBase('/audit-logging'), to: withBase('/administration/logging/audit-logging') }, - { from: withBase('/jobs'), to: withBase('/administration/jobs') }, - { from: withBase('/upgrade-hdb-instance'), to: withBase('/deployments/upgrade-hdb-instance') }, - { from: withBase('/operations-api'), to: withBase('/developers/operations-api/') }, - { from: withBase('/rest'), to: withBase('/developers/rest') }, - { from: withBase('/api'), to: withBase('/developers/operations-api/') }, - - // File rename redirect - { from: withBase('/administration/logging/logging'), to: withBase('/administration/logging/standard-logging') }, - - // Old Technical Details -> Reference paths - { from: withBase('/technical-details/reference'), to: withBase('/reference/') }, - - // Getting Started and Foundations pages to new Learn section - { from: withBase('/getting-started'), to: '/learn/' }, - { from: withBase('/4.6/getting-started'), to: '/learn/' }, - { from: withBase('/4.5/getting-started'), to: '/learn/' }, - { from: withBase('/4.4/getting-started'), to: '/learn/' }, - - { - from: withBase('/getting-started/installation'), - to: '/learn/getting-started/install-and-connect-harper', - }, - { - from: withBase('/4.6/getting-started/installation'), - to: '/learn/getting-started/install-and-connect-harper', - }, - { - from: withBase('/4.5/getting-started/installation'), - to: '/learn/getting-started/install-and-connect-harper', - }, - { - from: withBase('/4.4/getting-started/installation'), - to: '/learn/getting-started/install-and-connect-harper', - }, - - { from: withBase('/getting-started/quickstart'), to: '/learn/' }, - { from: withBase('/4.6/getting-started/quickstart'), to: '/learn/' }, - { from: withBase('/4.5/getting-started/quickstart'), to: '/learn/' }, - { from: withBase('/4.4/getting-started/quickstart'), to: '/learn/' }, - - { from: withBase('/foundations/harper-architecture'), to: '/learn/' }, - { from: withBase('/4.6/foundations/harper-architecture'), to: '/learn/' }, - { from: withBase('/4.5/foundations/harper-architecture'), to: '/learn/' }, - { from: withBase('/4.4/foundations/harper-architecture'), to: '/learn/' }, - - { from: withBase('/foundations/core-concepts'), to: '/learn/' }, - { from: withBase('/4.6/foundations/core-concepts'), to: '/learn/' }, - { from: withBase('/4.5/foundations/core-concepts'), to: '/learn/' }, - { from: withBase('/4.4/foundations/core-concepts'), to: '/learn/' }, - - { from: withBase('/foundations/use-cases'), to: '/learn/' }, - { from: withBase('/4.6/foundations/use-cases'), to: '/learn/' }, - { from: withBase('/4.5/foundations/use-cases'), to: '/learn/' }, - { from: withBase('/4.4/foundations/use-cases'), to: '/learn/' } - ); - - return redirects; -} - -// Combine all redirects -export function generateRedirects(basePath: string): RedirectRule[] { - return [...generateReleaseNotesRedirects(), ...generateDocsRedirects(basePath)]; -} - -// For backward compatibility, export a default set with empty base path -export const redirects = generateRedirects(''); - -// Function to create wildcard redirects for moved sections -// This handles dynamic redirects for paths not explicitly defined in the main redirect list -export function createRedirects(existingPath: string, basePath: string = ''): string[] | undefined { - const redirects: string[] = []; - - // Handle release notes redirects from old location to new - if (existingPath.startsWith('/release-notes/')) { - // Extract the path after /release-notes/ - const subpath = existingPath.replace('/release-notes/', ''); - - // Handle old version naming (4.tucker -> v4-tucker, etc.) - let oldSubpath = subpath; - const versionMap: Record = { - 'v1-alby': '1.alby', - 'v2-penny': '2.penny', - 'v3-monkey': '3.monkey', - 'v4-tucker': '4.tucker', - }; - - // Check if the path starts with a new version name and convert to old format - for (const [newName, oldName] of Object.entries(versionMap)) { - if (subpath.startsWith(`${newName}/`) || subpath === newName) { - oldSubpath = subpath.replace(newName, oldName); - break; - } - } - - // Add redirects from current version docs (4.6 is served at /docs/) - redirects.push(`/docs/technical-details/release-notes/${subpath}`); - if (oldSubpath !== subpath) { - redirects.push(`/docs/technical-details/release-notes/${oldSubpath}`); - } - - // Also redirect from all versioned docs paths - const versions = ['4.1', '4.2', '4.3', '4.4', '4.5', '4.6']; - for (const version of versions) { - redirects.push(`/docs/${version}/technical-details/release-notes/${subpath}`); - if (oldSubpath !== subpath) { - redirects.push(`/docs/${version}/technical-details/release-notes/${oldSubpath}`); - } - } - } - - // Only create wildcard redirects for paths that aren't already explicitly defined - // Check if this is a path we handle with wildcard redirects - - // Harper Studio - only for subpaths not already defined - if (existingPath.startsWith(`${basePath}/administration/harper-studio/`)) { - const subpath = existingPath.replace(`${basePath}/administration/harper-studio/`, ''); - // Skip paths that are already explicitly redirected - const explicitStudioPaths = [ - 'create-account', - 'login-password-reset', - 'organizations', - 'instances', - 'query-instance-data', - 'manage-databases-browse-data', - 'manage-replication', - 'manage-instance-users', - 'manage-applications', - 'instance-metrics', - 'instance-configuration', - 'enable-mixed-content', - ]; - if (subpath && !explicitStudioPaths.includes(subpath)) { - redirects.push(`${basePath}/administration/harperdb-studio/${subpath}`); - } - } - - // Harper Cloud - only for subpaths not already defined - if (existingPath.startsWith(`${basePath}/deployments/harper-cloud/`)) { - const subpath = existingPath.replace(`${basePath}/deployments/harper-cloud/`, ''); - // The main harper-cloud redirect is explicit, only handle other subpaths - if (subpath) { - redirects.push(`${basePath}/deployments/harperdb-cloud/${subpath}`); - } - } - - // Install Harper - only for subpaths not already defined - if (existingPath.startsWith(`${basePath}/deployments/install-harper/`)) { - const subpath = existingPath.replace(`${basePath}/deployments/install-harper/`, ''); - // Skip 'linux' as it's explicitly defined - if (subpath && subpath !== 'linux') { - redirects.push(`${basePath}/deployments/install-harperdb/${subpath}`); - } - } - - // Custom Functions - handle subpaths - if (existingPath.startsWith(`${basePath}/developers/custom-functions/`)) { - const subpath = existingPath.replace(`${basePath}/developers/custom-functions/`, ''); - // Skip paths that are explicitly defined - const explicitCustomPaths = ['define-routes', 'debugging-custom-function', 'example-projects']; - if (subpath && !explicitCustomPaths.includes(subpath)) { - redirects.push(`${basePath}/custom-functions/${subpath}`); - } - } - - if (existingPath.startsWith(`${basePath}/reference/sql-guide`)) { - const subpath = existingPath.replace(`${basePath}/reference/sql-guide`, ''); - if (subpath) { - redirects.push(`${basePath}/sql-guide${subpath}`); - redirects.push(`${basePath}/developers/sql-guide${subpath}`); - } - } - - if (existingPath.startsWith(`${basePath}/reference/clustering`)) { - const subpath = existingPath.replace(`${basePath}/reference/clustering`, ''); - if (subpath) { - redirects.push(`${basePath}/developers/clustering${subpath}`); - } - } - - // Old Technical Details -> Reference paths - if (existingPath.startsWith(`${basePath}/reference/`)) { - const subpath = existingPath.replace(`${basePath}/reference/`, ''); - if (subpath) { - redirects.push(`${basePath}/technical-details/reference/${subpath}`); - } - } - - // Don't create wildcard redirects for these as they're all explicitly defined: - // - /developers/security/* (all subpaths are explicit) - // - /deployments/harper-cli (explicit) - // - /developers/operations-api/* (has explicit redirects) - - return redirects.length > 0 ? redirects : undefined; +// ─── Static redirect rules ─────────────────────────────────────────────────── +// All paths sourced from GA pageview data (Oct 2025–Feb 2026). +// Paths with <10 views are marked LOW TRAFFIC and may be dropped in a future cleanup. + +export const redirects: RedirectRule[] = [ + // ── Docs root ────────────────────────────────────────────────────────────── + // Note: /docs and /docs/ cannot be redirected here because Docusaurus builds + // a real page at that path (docs/index.mdx). The index.mdx itself should + // handle sending users to the right place. + { from: '/docs/', to: '/' }, + + // ── Getting Started / Foundations → Learn ───────────────────────────────── + { from: '/docs/getting-started', to: '/learn' }, + { from: '/docs/getting-started/quickstart', to: '/learn' }, + { from: '/docs/getting-started/installation', to: '/learn/getting-started/install-and-connect-harper' }, + { from: '/docs/getting-started/install-harper', to: '/learn/getting-started/install-and-connect-harper' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/getting-started/what-is-harper', to: '/learn' }, + { from: '/docs/getting-started/harper-concepts', to: '/learn' }, + { from: '/docs/foundations/harper-architecture', to: '/learn' }, + { from: '/docs/foundations/core-concepts', to: '/learn' }, + { from: '/docs/foundations/use-cases', to: '/learn' }, + + // ── Operations API ───────────────────────────────────────────────────────── + { from: '/docs/developers/operations-api', to: '/reference/v4/operations-api/overview' }, + { from: '/docs/developers/operations-api/nosql-operations', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/databases-and-tables', to: '/reference/v4/database/overview' }, + { from: '/docs/developers/operations-api/components', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/advanced-json-sql-examples', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/bulk-operations', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/system-operations', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/configuration', to: '/reference/v4/configuration/operations' }, + { from: '/docs/developers/operations-api/users-and-roles', to: '/reference/v4/users-and-roles/operations' }, + { from: '/docs/developers/operations-api/analytics', to: '/reference/v4/analytics/operations' }, + { from: '/docs/developers/operations-api/quickstart-examples', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/certificate-management', to: '/reference/v4/security/certificate-management' }, + { from: '/docs/developers/operations-api/custom-functions', to: '/reference/v4/legacy/custom-functions' }, + { from: '/docs/developers/operations-api/jobs', to: '/reference/v4/database/jobs' }, + { from: '/docs/developers/operations-api/logs', to: '/reference/v4/logging/operations' }, + { from: '/docs/developers/operations-api/sql-operations', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/operations-api/clustering-nats', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/operations-api/clustering', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/operations-api/token-authentication', to: '/reference/v4/security/jwt-authentication' }, + { from: '/docs/developers/operations-api/registration', to: '/reference/v4/operations-api/operations' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/developers/operations-api/utilities', to: '/reference/v4/operations-api/operations' }, + + // ── Applications / Components ────────────────────────────────────────────── + { from: '/docs/developers/applications', to: '/reference/v4/components/overview' }, + { from: '/docs/developers/applications/defining-schemas', to: '/reference/v4/database/schema' }, + { + // TODO: eventually redirect to a dedicated learn page for database caching + from: '/docs/developers/applications/caching', + to: '/reference/v4/resources/overview', + }, + { from: '/docs/developers/applications/data-loader', to: '/reference/v4/database/data-loader' }, + { from: '/docs/developers/applications/web-applications', to: '/reference/v4/components/applications' }, + { from: '/docs/developers/applications/debugging', to: '/reference/v4/components/overview' }, + { from: '/docs/developers/applications/define-routes', to: '/reference/v4/fastify-routes/overview' }, + { from: '/docs/developers/applications/defining-roles', to: '/reference/v4/users-and-roles/overview' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/developers/applications/', to: '/reference/v4/components/overview' }, + + // ── Old /developers/components/* (separate from /reference/components/*) ── + { from: '/docs/developers/components', to: '/reference/v4/components/overview' }, + { from: '/docs/developers/components/built-in', to: '/reference/v4/components/extension-api' }, + { from: '/docs/developers/components/reference', to: '/reference/v4/components/extension-api' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/developers/components/writing-extensions', to: '/reference/v4/components/extension-api' }, + { from: '/docs/developers/components/managing', to: '/reference/v4/components/overview' }, + { from: '/docs/developers/miscellaneous/sdks', to: '/reference/v4/components/overview' }, + + // ── Security ─────────────────────────────────────────────────────────────── + { from: '/docs/developers/security', to: '/reference/v4/security/overview' }, + { from: '/docs/developers/security/configuration', to: '/reference/v4/security/configuration' }, + { from: '/docs/developers/security/users-and-roles', to: '/reference/v4/users-and-roles/overview' }, + { from: '/docs/developers/security/jwt-auth', to: '/reference/v4/security/jwt-authentication' }, + { from: '/docs/developers/security/basic-auth', to: '/reference/v4/security/basic-authentication' }, + { from: '/docs/developers/security/certificate-management', to: '/reference/v4/security/certificate-management' }, + { from: '/docs/developers/security/certificate-verification', to: '/reference/v4/security/certificate-verification' }, + { from: '/docs/developers/security/mtls-auth', to: '/reference/v4/security/mtls-authentication' }, + + // ── Replication / Clustering ─────────────────────────────────────────────── + { from: '/docs/developers/replication', to: '/reference/v4/replication/overview' }, + { from: '/docs/developers/replication/sharding', to: '/reference/v4/replication/sharding' }, + { from: '/docs/developers/clustering', to: '/reference/v4/replication/clustering' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/developers/clustering/certificate-management', to: '/reference/v4/security/certificate-management' }, + { from: '/docs/developers/clustering/enabling-clustering', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/clustering/creating-a-cluster-user', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/clustering/things-worth-knowing', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/clustering/subscription-overview', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/replication/clustering/enabling-clustering', to: '/reference/v4/replication/clustering' }, + + // ── REST / Real-time ──────────────────────────────────────────────────────── + { from: '/docs/developers/rest', to: '/reference/v4/rest/overview' }, + { from: '/docs/developers/real-time', to: '/reference/v4/rest/websockets' }, + { from: '/docs/developers/sql-guide', to: '/reference/v4/database/sql' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/developers/sql-guide/functions', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/sql-guide/date-functions', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/sql-guide/features-matrix', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/sql-guide/json-search', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/sql-guide/sql-geospatial-functions', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/sql-guide/reserved-word', to: '/reference/v4/database/sql' }, + + // ── Configuration ───────────────────────────────────────────────────────── + { from: '/docs/deployments/configuration', to: '/reference/v4/configuration/overview' }, + + // ── CLI ─────────────────────────────────────────────────────────────────── + { from: '/docs/deployments/harper-cli', to: '/reference/v4/cli/overview' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/deployments/harperdb-cli', to: '/reference/v4/cli/overview' }, + { from: '/docs/administration/harperdb-cli', to: '/reference/v4/cli/overview' }, + + // ── Install / Upgrade (no equivalent page in /reference/v4/) ────────────── + // These remain as self-referential paths that may still exist on the live site. + // LOW TRAFFIC (<10 views for most subpaths): + { from: '/docs/deployments/upgrade-hdb-instance', to: '/learn' }, + { from: '/docs/administration/upgrade-hdb-instance', to: '/learn' }, + + // ── Harper Cloud → Legacy ───────────────────────────────────────────────── + { from: '/docs/deployments/harper-cloud', to: '/reference/v4/legacy/cloud' }, + // LOW TRAFFIC (<10 views each): + { from: '/docs/deployments/harperdb-cloud', to: '/reference/v4/legacy/cloud' }, + + // ── Studio ──────────────────────────────────────────────────────────────── + { from: '/docs/administration/harper-studio', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/create-account', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/login-password-reset', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/instances', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/instance-metrics', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/instance-configuration', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-databases-browse-data', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-instance-users', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-applications', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/enable-mixed-content', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/query-instance-data', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/organizations', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-instance-roles', to: '/reference/v4/studio/overview' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/administration/harperdb-studio/', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-applications', to: '/reference/v4/studio/overview' }, + + // ── Logging ─────────────────────────────────────────────────────────────── + { from: '/docs/administration/logging', to: '/reference/v4/logging/overview' }, + { from: '/docs/administration/logging/standard-logging', to: '/reference/v4/logging/overview' }, + { from: '/docs/administration/logging/audit-logging', to: '/reference/v4/logging/overview' }, + { from: '/docs/administration/logging/transaction-logging', to: '/reference/v4/logging/overview' }, + + // ── Administration: other ───────────────────────────────────────────────── + { from: '/docs/administration/cloning', to: '/reference/v4/replication/overview' }, + { from: '/docs/administration/compact', to: '/reference/v4/database/compaction' }, + { from: '/docs/administration/jobs', to: '/reference/v4/database/jobs' }, + + // ── Old /docs/reference/* ───────────────────────────────────────────────── + { from: '/docs/reference', to: '/reference/v4' }, + { from: '/docs/reference/globals', to: '/reference/v4/components/javascript-environment' }, + { from: '/docs/reference/resources', to: '/reference/v4/resources/overview' }, + { from: '/docs/reference/resources/instance-binding', to: '/reference/v4/resources/resource-api' }, + { from: '/docs/reference/resources/migration', to: '/reference/v4/database/data-loader' }, + { from: '/docs/reference/resources/query-optimization', to: '/reference/v4/resources/query-optimization' }, + { from: '/docs/reference/components', to: '/reference/v4/components/overview' }, + { from: '/docs/reference/components/built-in-extensions', to: '/reference/v4/components/extension-api' }, + { from: '/docs/reference/components/extensions', to: '/reference/v4/components/extension-api' }, + { from: '/docs/reference/components/plugins', to: '/reference/v4/components/plugin-api' }, + { from: '/docs/reference/components/applications', to: '/reference/v4/components/applications' }, + { from: '/docs/reference/components/configuration', to: '/reference/v4/components/overview' }, + { from: '/docs/reference/analytics', to: '/reference/v4/analytics/overview' }, + { from: '/docs/reference/dynamic-schema', to: '/reference/v4/database/schema' }, + { from: '/docs/reference/data-types', to: '/reference/v4/database/schema' }, + { from: '/docs/reference/blob', to: '/reference/v4/database/schema' }, + { from: '/docs/reference/transactions', to: '/reference/v4/database/transaction' }, + { from: '/docs/reference/graphql', to: '/reference/v4/graphql-querying/overview' }, + { from: '/docs/reference/content-types', to: '/reference/v4/rest/content-types' }, + { from: '/docs/reference/headers', to: '/reference/v4/rest/headers' }, + { from: '/docs/reference/roles', to: '/reference/v4/users-and-roles/overview' }, + { from: '/docs/reference/storage-algorithm', to: '/reference/v4/database/storage-algorithm' }, + { from: '/docs/reference/limits', to: '/reference/v4/database/schema' }, + { from: '/docs/reference/architecture', to: '/reference/v4' }, + { from: '/docs/reference/clustering', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/clustering/enabling-clustering', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/clustering/establishing-routes', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/clustering/subscription-overview', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/clustering/managing-subscriptions', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/clustering/things-worth-knowing', to: '/reference/v4/replication/clustering' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/reference/clustering/certificate-management', to: '/reference/v4/security/certificate-management' }, + { from: '/docs/reference/clustering/creating-a-cluster-user', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/clustering/naming-a-node', to: '/reference/v4/replication/clustering' }, + { from: '/docs/reference/sql-guide', to: '/reference/v4/database/sql' }, + { from: '/docs/reference/sql-guide/json-search', to: '/reference/v4/database/sql' }, + // LOW TRAFFIC (<10 views): + { from: '/docs/reference/sql-guide/date-functions', to: '/reference/v4/database/sql' }, + { from: '/docs/reference/sql-guide/functions', to: '/reference/v4/database/sql' }, + { from: '/docs/reference/sql-guide/sql-geospatial-functions', to: '/reference/v4/database/sql' }, + + // ── Old /technical-details/reference/* (pre-v4 paths) ──────────────────── + // LOW TRAFFIC (<10 views): + { from: '/technical-details/reference/resources', to: '/reference/v4/resources/overview' }, + + // ── Old /docs/administration/administration ──────────────────────────────── + // LOW TRAFFIC (<10 views): + { from: '/docs/administration/administration', to: '/reference/v4' }, + { from: '/docs/administration', to: '/reference/v4' }, + { from: '/docs/deployments', to: '/reference/v4' }, + + // ── Release notes ───────────────────────────────────────────────────────── + // Only paths seen in pageview data (Oct 2025–Feb 2026). The old docs embedded + // release notes under /docs/technical-details/release-notes/ using a dot-separated + // version name format (e.g. "4.tucker" instead of "v4-tucker"). + { from: '/docs/technical-details/release-notes', to: '/release-notes' }, + // LOW TRAFFIC (<16 views each): + { from: '/docs/4.3/technical-details/release-notes/4.tucker/2.1.1', to: '/release-notes/v2-penny/2.1.1' }, + { from: '/docs/4.3/technical-details/release-notes/4.tucker/1.3.1', to: '/release-notes/v1-alby/1.3.1' }, + { from: '/docs/4.3/technical-details/release-notes/4.tucker/3.0.0', to: '/release-notes/v3-monkey/3.0.0' }, +]; + +// ─── Wildcard / dynamic redirects ──────────────────────────────────────────── +// Called by Docusaurus for every existing page path to generate inbound redirects. + +export function createRedirects(_existingPath: string): string[] | undefined { + // No dynamic redirects needed at this time. + // The versioned /docs/4.X/ roots are real Docusaurus-built directories and + // cannot be redirected via this plugin (postbuild.js would conflict). + return undefined; } From 487644598ddf55344c3c1c0e908ebabeeb4c84b4 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 16:10:54 -0600 Subject: [PATCH 36/51] delete old docs content --- docs/administration/_category_.json | 10 - docs/administration/administration.md | 32 - docs/administration/cloning.md | 153 -- docs/administration/compact.md | 60 - .../harper-studio/create-account.md | 27 - .../harper-studio/enable-mixed-content.md | 9 - docs/administration/harper-studio/index.md | 21 - .../harper-studio/instance-configuration.md | 108 - .../harper-studio/instance-metrics.md | 16 - .../administration/harper-studio/instances.md | 146 -- .../harper-studio/login-password-reset.md | 42 - .../harper-studio/manage-applications.md | 59 - .../manage-databases-browse-data.md | 123 -- .../harper-studio/manage-instance-roles.md | 77 - .../harper-studio/manage-instance-users.md | 53 - .../harper-studio/manage-replication.md | 90 - .../harper-studio/organizations.md | 109 - .../harper-studio/query-instance-data.md | 52 - docs/administration/jobs.md | 112 -- docs/administration/logging/audit-logging.md | 126 -- docs/administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - docs/deployments/_category_.json | 10 - docs/deployments/configuration.md | 1556 --------------- docs/deployments/harper-cli.md | 194 -- docs/deployments/harper-cloud/alarms.md | 20 - docs/deployments/harper-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - docs/deployments/harper-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - docs/deployments/install-harper/index.md | 94 - docs/deployments/install-harper/linux.md | 225 --- docs/deployments/upgrade-hdb-instance.md | 140 -- docs/developers/_category_.json | 10 - docs/developers/applications/caching.md | 325 --- docs/developers/applications/data-loader.md | 195 -- docs/developers/applications/debugging.md | 39 - docs/developers/applications/define-routes.md | 119 -- .../developers/applications/defining-roles.md | 85 - .../applications/defining-schemas.md | 272 --- docs/developers/applications/index.md | 244 --- .../applications/web-applications.md | 63 - .../advanced-json-sql-examples.md | 1775 ----------------- docs/developers/operations-api/analytics.md | 121 -- .../operations-api/bulk-operations.md | 255 --- .../operations-api/certificate-management.md | 124 -- .../operations-api/clustering-nats.md | 486 ----- docs/developers/operations-api/clustering.md | 355 ---- docs/developers/operations-api/components.md | 553 ----- .../operations-api/configuration.md | 135 -- .../operations-api/custom-functions.md | 281 --- .../operations-api/databases-and-tables.md | 388 ---- docs/developers/operations-api/index.md | 55 - docs/developers/operations-api/jobs.md | 87 - docs/developers/operations-api/logs.md | 733 ------- .../operations-api/nosql-operations.md | 389 ---- .../operations-api/quickstart-examples.md | 370 ---- .../developers/operations-api/registration.md | 231 --- .../operations-api/sql-operations.md | 127 -- .../operations-api/system-operations.md | 195 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- docs/developers/real-time.md | 180 -- docs/developers/replication/index.md | 300 --- docs/developers/replication/sharding.md | 167 -- docs/developers/rest.md | 403 ---- docs/developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 153 -- .../security/certificate-verification.md | 502 ----- docs/developers/security/configuration.md | 40 - docs/developers/security/index.md | 23 - docs/developers/security/jwt-auth.md | 96 - docs/developers/security/mtls-auth.md | 24 - docs/developers/security/users-and-roles.md | 273 --- docs/index.mdx | 55 - docs/reference/_category_.json | 6 - docs/reference/analytics.md | 173 -- docs/reference/architecture.md | 42 - docs/reference/blob.md | 146 -- .../clustering/certificate-management.md | 82 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - docs/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 199 -- docs/reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - docs/reference/components/applications.md | 221 -- .../components/built-in-extensions.md | 319 --- docs/reference/components/configuration.md | 89 - docs/reference/components/extensions.md | 187 -- docs/reference/components/index.md | 39 - docs/reference/components/plugins.md | 629 ------ docs/reference/content-types.md | 35 - docs/reference/data-types.md | 60 - docs/reference/dynamic-schema.md | 148 -- docs/reference/globals.md | 422 ---- docs/reference/graphql.md | 254 --- docs/reference/headers.md | 12 - docs/reference/index.md | 9 - docs/reference/limits.md | 37 - docs/reference/resources/index.md | 796 -------- docs/reference/resources/instance-binding.md | 721 ------- docs/reference/resources/migration.md | 137 -- .../reference/resources/query-optimization.md | 37 - docs/reference/roles.md | 117 -- docs/reference/sql-guide/date-functions.md | 227 --- docs/reference/sql-guide/features-matrix.md | 88 - docs/reference/sql-guide/functions.md | 145 -- docs/reference/sql-guide/index.md | 88 - docs/reference/sql-guide/json-search.md | 177 -- docs/reference/sql-guide/reserved-word.md | 207 -- .../sql-guide/sql-geospatial-functions.md | 419 ---- docs/reference/storage-algorithm.md | 27 - docs/reference/transactions.md | 40 - .../add-ons-and-sdks/google-data-studio.md | 37 - .../version-4.1/add-ons-and-sdks/index.md | 7 - versioned_docs/version-4.1/audit-logging.md | 124 -- .../clustering/certificate-management.md | 70 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.1/clustering/index.md | 39 - .../clustering/managing-subscriptions.md | 168 -- .../version-4.1/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - versioned_docs/version-4.1/configuration.md | 790 -------- .../custom-functions/create-project.md | 40 - .../custom-functions-operations.md | 45 - .../debugging-custom-function.md | 97 - .../custom-functions/define-helpers.md | 34 - .../custom-functions/define-routes.md | 113 -- .../custom-functions/example-projects.md | 37 - .../custom-functions/host-static.md | 21 - .../version-4.1/custom-functions/index.md | 29 - .../requirements-definitions.md | 79 - .../custom-functions/restarting-server.md | 16 - .../version-4.1/custom-functions/templates.md | 7 - .../custom-functions/using-npm-git.md | 11 - .../getting-started/getting-started.md | 54 - versioned_docs/version-4.1/harperdb-cli.md | 115 -- .../version-4.1/harperdb-cloud/alarms.md | 23 - .../version-4.1/harperdb-cloud/index.md | 7 - .../instance-size-hardware-specs.md | 24 - .../version-4.1/harperdb-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 32 - .../harperdb-studio/create-account.md | 27 - .../harperdb-studio/enable-mixed-content.md | 9 - .../version-4.1/harperdb-studio/index.md | 19 - .../harperdb-studio/instance-configuration.md | 106 - .../harperdb-studio/instance-example-code.md | 60 - .../harperdb-studio/instance-metrics.md | 19 - .../version-4.1/harperdb-studio/instances.md | 151 -- .../harperdb-studio/login-password-reset.md | 42 - .../harperdb-studio/manage-charts.md | 78 - .../harperdb-studio/manage-clustering.md | 93 - .../harperdb-studio/manage-functions.md | 158 -- .../harperdb-studio/manage-instance-roles.md | 75 - .../harperdb-studio/manage-instance-users.md | 53 - .../manage-schemas-browse-data.md | 123 -- .../harperdb-studio/organizations.md | 109 - .../harperdb-studio/query-instance-data.md | 52 - .../version-4.1/harperdb-studio/resources.md | 39 - versioned_docs/version-4.1/index.md | 17 - .../version-4.1/install-harperdb/index.md | 61 - .../version-4.1/install-harperdb/linux.md | 211 -- versioned_docs/version-4.1/jobs.md | 112 -- versioned_docs/version-4.1/logging.md | 69 - .../version-4.1/reference/content-types.md | 27 - .../version-4.1/reference/data-types.md | 43 - .../version-4.1/reference/dynamic-schema.md | 148 -- .../version-4.1/reference/headers.md | 13 - versioned_docs/version-4.1/reference/index.md | 9 - .../version-4.1/reference/limits.md | 33 - .../reference/storage-algorithm.md | 27 - .../version-4.1/security/basic-auth.md | 57 - .../security/certificate-management.md | 61 - .../version-4.1/security/configuration.md | 42 - versioned_docs/version-4.1/security/index.md | 12 - .../version-4.1/security/jwt-auth.md | 98 - .../version-4.1/security/users-and-roles.md | 269 --- .../version-4.1/sql-guide/date-functions.md | 223 --- .../version-4.1/sql-guide/delete.md | 12 - .../version-4.1/sql-guide/features-matrix.md | 82 - .../version-4.1/sql-guide/functions.md | 141 -- versioned_docs/version-4.1/sql-guide/index.md | 9 - .../version-4.1/sql-guide/insert.md | 12 - versioned_docs/version-4.1/sql-guide/joins.md | 22 - .../version-4.1/sql-guide/json-search.md | 173 -- .../version-4.1/sql-guide/reserved-word.md | 203 -- .../version-4.1/sql-guide/select.md | 29 - .../sql-geospatial-functions/geoarea.md | 45 - .../sql-geospatial-functions/geocontains.md | 70 - .../sql-geospatial-functions/geoconvert.md | 33 - .../sql-geospatial-functions/geocrosses.md | 48 - .../sql-geospatial-functions/geodifference.md | 60 - .../sql-geospatial-functions/geodistance.md | 37 - .../sql-geospatial-functions/geoequal.md | 45 - .../sql-geospatial-functions/geolength.md | 47 - .../sql-geospatial-functions/geonear.md | 41 - .../sql-geospatial-functions/index.md | 15 - .../version-4.1/sql-guide/update.md | 13 - versioned_docs/version-4.1/support.md | 84 - .../version-4.1/transaction-logging.md | 87 - .../version-4.1/upgrade-hdb-instance.md | 91 - .../administration/_category_.json | 10 - .../administration/administration.md | 23 - .../version-4.2/administration/cloning.md | 156 -- .../harperdb-studio/create-account.md | 27 - .../harperdb-studio/enable-mixed-content.md | 9 - .../administration/harperdb-studio/index.md | 19 - .../harperdb-studio/instance-configuration.md | 106 - .../harperdb-studio/instance-example-code.md | 60 - .../harperdb-studio/instance-metrics.md | 16 - .../harperdb-studio/instances.md | 146 -- .../harperdb-studio/login-password-reset.md | 42 - .../harperdb-studio/manage-charts.md | 71 - .../harperdb-studio/manage-clustering.md | 93 - .../harperdb-studio/manage-functions.md | 158 -- .../harperdb-studio/manage-instance-roles.md | 75 - .../harperdb-studio/manage-instance-users.md | 53 - .../manage-schemas-browse-data.md | 123 -- .../harperdb-studio/organizations.md | 109 - .../harperdb-studio/query-instance-data.md | 52 - .../version-4.2/administration/jobs.md | 112 -- .../administration/logging/audit-logging.md | 126 -- .../administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - .../version-4.2/deployments/_category_.json | 10 - .../version-4.2/deployments/configuration.md | 745 ------- .../version-4.2/deployments/harperdb-cli.md | 96 - .../deployments/harperdb-cloud/alarms.md | 20 - .../deployments/harperdb-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - .../deployments/harperdb-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - .../deployments/install-harperdb/index.md | 63 - .../deployments/install-harperdb/linux.md | 212 -- .../deployments/upgrade-hdb-instance.md | 91 - .../version-4.2/developers/_category_.json | 10 - .../developers/applications/caching.md | 274 --- .../developers/applications/debugging.md | 39 - .../developers/applications/define-routes.md | 119 -- .../applications/defining-schemas.md | 103 - .../applications/example-projects.md | 37 - .../developers/applications/index.md | 376 ---- .../developers/components/drivers.md | 10 - .../components/google-data-studio.md | 37 - .../developers/components/index.md | 38 - .../developers/components/installing.md | 79 - .../developers/components/operations.md | 45 - .../version-4.2/developers/components/sdks.md | 21 - .../components/writing-extensions.md | 153 -- .../advanced-json-sql-examples.md | 1775 ----------------- .../operations-api/bulk-operations.md | 148 -- .../developers/operations-api/clustering.md | 413 ---- .../developers/operations-api/components.md | 314 --- .../operations-api/custom-functions.md | 277 --- .../operations-api/databases-and-tables.md | 386 ---- .../developers/operations-api/index.md | 51 - .../developers/operations-api/jobs.md | 87 - .../developers/operations-api/logs.md | 732 ------- .../operations-api/nosql-operations.md | 360 ---- .../operations-api/quickstart-examples.md | 368 ---- .../developers/operations-api/registration.md | 70 - .../operations-api/sql-operations.md | 123 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- .../developers/operations-api/utilities.md | 376 ---- .../version-4.2/developers/real-time.md | 160 -- versioned_docs/version-4.2/developers/rest.md | 200 -- .../developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 62 - .../developers/security/configuration.md | 40 - .../version-4.2/developers/security/index.md | 12 - .../developers/security/jwt-auth.md | 96 - .../developers/security/users-and-roles.md | 272 --- versioned_docs/version-4.2/getting-started.md | 84 - versioned_docs/version-4.2/index.md | 106 - .../version-4.2/reference/_category_.json | 10 - .../version-4.2/reference/analytics.md | 117 -- .../version-4.2/reference/architecture.md | 42 - .../clustering/certificate-management.md | 70 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.2/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 168 -- .../reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - .../version-4.2/reference/content-types.md | 27 - .../version-4.2/reference/data-types.md | 45 - .../version-4.2/reference/dynamic-schema.md | 148 -- .../version-4.2/reference/globals.md | 80 - .../version-4.2/reference/headers.md | 12 - versioned_docs/version-4.2/reference/index.md | 9 - .../version-4.2/reference/limits.md | 33 - .../version-4.2/reference/resource.md | 538 ----- .../reference/sql-guide/date-functions.md | 223 --- .../reference/sql-guide/features-matrix.md | 82 - .../reference/sql-guide/functions.md | 141 -- .../version-4.2/reference/sql-guide/index.md | 88 - .../reference/sql-guide/json-search.md | 173 -- .../reference/sql-guide/reserved-word.md | 203 -- .../sql-guide/sql-geospatial-functions.md | 415 ---- .../reference/storage-algorithm.md | 27 - .../version-4.2/reference/transactions.md | 40 - .../administration/_category_.json | 10 - .../administration/administration.md | 32 - .../version-4.3/administration/cloning.md | 174 -- .../version-4.3/administration/compact.md | 67 - .../harperdb-studio/create-account.md | 27 - .../harperdb-studio/enable-mixed-content.md | 9 - .../administration/harperdb-studio/index.md | 21 - .../harperdb-studio/instance-configuration.md | 112 -- .../harperdb-studio/instance-metrics.md | 16 - .../harperdb-studio/instances.md | 146 -- .../harperdb-studio/login-password-reset.md | 42 - .../harperdb-studio/manage-applications.md | 62 - .../harperdb-studio/manage-charts.md | 71 - .../manage-databases-browse-data.md | 123 -- .../harperdb-studio/manage-instance-roles.md | 77 - .../harperdb-studio/manage-instance-users.md | 53 - .../harperdb-studio/manage-replication.md | 90 - .../harperdb-studio/organizations.md | 109 - .../harperdb-studio/query-instance-data.md | 52 - .../version-4.3/administration/jobs.md | 112 -- .../administration/logging/audit-logging.md | 126 -- .../administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - .../version-4.3/deployments/_category_.json | 10 - .../version-4.3/deployments/configuration.md | 973 --------- .../version-4.3/deployments/harperdb-cli.md | 168 -- .../deployments/harperdb-cloud/alarms.md | 20 - .../deployments/harperdb-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - .../deployments/harperdb-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - .../deployments/install-harperdb/index.md | 61 - .../deployments/install-harperdb/linux.md | 225 --- .../deployments/upgrade-hdb-instance.md | 91 - .../version-4.3/developers/_category_.json | 10 - .../developers/applications/caching.md | 292 --- .../developers/applications/debugging.md | 39 - .../developers/applications/define-routes.md | 119 -- .../applications/defining-schemas.md | 168 -- .../applications/example-projects.md | 37 - .../developers/applications/index.md | 376 ---- .../developers/components/drivers.md | 10 - .../components/google-data-studio.md | 37 - .../developers/components/index.md | 38 - .../developers/components/installing.md | 79 - .../developers/components/operations.md | 45 - .../version-4.3/developers/components/sdks.md | 21 - .../components/writing-extensions.md | 175 -- .../advanced-json-sql-examples.md | 1775 ----------------- .../operations-api/bulk-operations.md | 148 -- .../developers/operations-api/clustering.md | 486 ----- .../developers/operations-api/components.md | 314 --- .../operations-api/custom-functions.md | 277 --- .../operations-api/databases-and-tables.md | 386 ---- .../developers/operations-api/index.md | 51 - .../developers/operations-api/jobs.md | 87 - .../developers/operations-api/logs.md | 732 ------- .../operations-api/nosql-operations.md | 384 ---- .../operations-api/quickstart-examples.md | 370 ---- .../developers/operations-api/registration.md | 70 - .../operations-api/sql-operations.md | 127 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- .../developers/operations-api/utilities.md | 377 ---- .../version-4.3/developers/real-time.md | 162 -- versioned_docs/version-4.3/developers/rest.md | 391 ---- .../developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 62 - .../developers/security/configuration.md | 40 - .../version-4.3/developers/security/index.md | 13 - .../developers/security/jwt-auth.md | 96 - .../developers/security/mtls-auth.md | 7 - .../developers/security/users-and-roles.md | 272 --- versioned_docs/version-4.3/getting-started.md | 84 - versioned_docs/version-4.3/index.md | 106 - .../version-4.3/reference/_category_.json | 10 - .../version-4.3/reference/analytics.md | 117 -- .../version-4.3/reference/architecture.md | 42 - .../clustering/certificate-management.md | 70 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.3/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 168 -- .../reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - .../version-4.3/reference/content-types.md | 29 - .../version-4.3/reference/data-types.md | 52 - .../version-4.3/reference/dynamic-schema.md | 148 -- .../version-4.3/reference/globals.md | 239 --- .../version-4.3/reference/headers.md | 12 - versioned_docs/version-4.3/reference/index.md | 9 - .../version-4.3/reference/limits.md | 33 - .../version-4.3/reference/resource.md | 697 ------- .../reference/sql-guide/date-functions.md | 227 --- .../reference/sql-guide/features-matrix.md | 86 - .../reference/sql-guide/functions.md | 145 -- .../version-4.3/reference/sql-guide/index.md | 88 - .../reference/sql-guide/json-search.md | 177 -- .../reference/sql-guide/reserved-word.md | 207 -- .../sql-guide/sql-geospatial-functions.md | 419 ---- .../reference/storage-algorithm.md | 27 - .../version-4.3/reference/transactions.md | 40 - .../administration/_category_.json | 10 - .../administration/administration.md | 28 - .../version-4.4/administration/cloning.md | 153 -- .../version-4.4/administration/compact.md | 60 - .../harper-studio/create-account.md | 27 - .../harper-studio/enable-mixed-content.md | 9 - .../administration/harper-studio/index.md | 21 - .../harper-studio/instance-configuration.md | 112 -- .../harper-studio/instance-metrics.md | 16 - .../administration/harper-studio/instances.md | 146 -- .../harper-studio/login-password-reset.md | 42 - .../harper-studio/manage-applications.md | 59 - .../manage-databases-browse-data.md | 123 -- .../harper-studio/manage-instance-roles.md | 77 - .../harper-studio/manage-instance-users.md | 53 - .../harper-studio/manage-replication.md | 90 - .../harper-studio/organizations.md | 109 - .../version-4.4/administration/jobs.md | 112 -- .../administration/logging/audit-logging.md | 126 -- .../administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - .../version-4.4/deployments/_category_.json | 10 - .../version-4.4/deployments/configuration.md | 1073 ---------- .../version-4.4/deployments/harper-cli.md | 194 -- .../deployments/harper-cloud/alarms.md | 20 - .../deployments/harper-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - .../deployments/harper-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - .../deployments/install-harper/index.md | 61 - .../deployments/install-harper/linux.md | 225 --- .../deployments/upgrade-hdb-instance.md | 140 -- .../version-4.4/developers/_category_.json | 10 - .../developers/applications/caching.md | 292 --- .../developers/applications/debugging.md | 39 - .../developers/applications/define-routes.md | 119 -- .../developers/applications/defining-roles.md | 85 - .../applications/defining-schemas.md | 219 -- .../developers/applications/index.md | 378 ---- .../applications/web-applications.md | 63 - .../developers/components/built-in.md | 116 -- .../developers/components/index.md | 25 - .../developers/components/managing.md | 180 -- .../developers/components/reference.md | 254 --- .../advanced-json-sql-examples.md | 1775 ----------------- .../operations-api/bulk-operations.md | 148 -- .../operations-api/clustering-nats.md | 486 ----- .../developers/operations-api/clustering.md | 345 ---- .../developers/operations-api/components.md | 511 ----- .../operations-api/custom-functions.md | 277 --- .../operations-api/databases-and-tables.md | 388 ---- .../developers/operations-api/index.md | 52 - .../developers/operations-api/jobs.md | 87 - .../developers/operations-api/logs.md | 732 ------- .../operations-api/nosql-operations.md | 384 ---- .../operations-api/quickstart-examples.md | 370 ---- .../developers/operations-api/registration.md | 70 - .../operations-api/sql-operations.md | 127 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- .../developers/operations-api/utilities.md | 463 ----- .../version-4.4/developers/real-time.md | 180 -- .../developers/replication/index.md | 256 --- .../developers/replication/sharding.md | 123 -- versioned_docs/version-4.4/developers/rest.md | 403 ---- .../developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 74 - .../developers/security/configuration.md | 40 - .../version-4.4/developers/security/index.md | 13 - .../developers/security/jwt-auth.md | 96 - .../developers/security/mtls-auth.md | 7 - .../developers/security/users-and-roles.md | 272 --- versioned_docs/version-4.4/index.md | 157 -- .../version-4.4/reference/_category_.json | 10 - .../version-4.4/reference/analytics.md | 117 -- .../version-4.4/reference/architecture.md | 42 - .../clustering/certificate-management.md | 70 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.4/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 199 -- .../reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - .../version-4.4/reference/content-types.md | 35 - .../version-4.4/reference/data-types.md | 52 - .../version-4.4/reference/dynamic-schema.md | 148 -- .../version-4.4/reference/globals.md | 313 --- .../version-4.4/reference/graphql.md | 254 --- .../version-4.4/reference/headers.md | 12 - versioned_docs/version-4.4/reference/index.md | 9 - .../version-4.4/reference/limits.md | 37 - .../reference/query-optimization.md | 37 - .../version-4.4/reference/resource.md | 726 ------- versioned_docs/version-4.4/reference/roles.md | 117 -- .../reference/sql-guide/date-functions.md | 227 --- .../reference/sql-guide/features-matrix.md | 88 - .../reference/sql-guide/functions.md | 145 -- .../version-4.4/reference/sql-guide/index.md | 88 - .../reference/sql-guide/json-search.md | 177 -- .../reference/sql-guide/reserved-word.md | 207 -- .../sql-guide/sql-geospatial-functions.md | 419 ---- .../reference/storage-algorithm.md | 27 - .../version-4.4/reference/transactions.md | 40 - .../administration/_category_.json | 10 - .../administration/administration.md | 32 - .../version-4.5/administration/cloning.md | 153 -- .../version-4.5/administration/compact.md | 60 - .../harper-studio/create-account.md | 27 - .../harper-studio/enable-mixed-content.md | 9 - .../administration/harper-studio/index.md | 21 - .../harper-studio/instance-configuration.md | 112 -- .../harper-studio/instance-metrics.md | 16 - .../administration/harper-studio/instances.md | 146 -- .../harper-studio/login-password-reset.md | 42 - .../harper-studio/manage-applications.md | 59 - .../manage-databases-browse-data.md | 123 -- .../harper-studio/manage-instance-roles.md | 77 - .../harper-studio/manage-instance-users.md | 53 - .../harper-studio/manage-replication.md | 90 - .../harper-studio/organizations.md | 109 - .../harper-studio/query-instance-data.md | 52 - .../version-4.5/administration/jobs.md | 112 -- .../administration/logging/audit-logging.md | 126 -- .../administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - .../version-4.5/deployments/_category_.json | 10 - .../version-4.5/deployments/configuration.md | 1150 ----------- .../version-4.5/deployments/harper-cli.md | 194 -- .../deployments/harper-cloud/alarms.md | 20 - .../deployments/harper-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - .../deployments/harper-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - .../deployments/install-harper/index.md | 61 - .../deployments/install-harper/linux.md | 225 --- .../deployments/upgrade-hdb-instance.md | 140 -- .../version-4.5/developers/_category_.json | 10 - .../developers/applications/caching.md | 325 --- .../developers/applications/debugging.md | 39 - .../developers/applications/define-routes.md | 119 -- .../developers/applications/defining-roles.md | 85 - .../applications/defining-schemas.md | 222 --- .../developers/applications/index.md | 168 -- .../applications/web-applications.md | 63 - .../developers/components/built-in.md | 153 -- .../developers/components/index.md | 26 - .../developers/components/managing.md | 180 -- .../developers/components/reference.md | 254 --- .../advanced-json-sql-examples.md | 1775 ----------------- .../operations-api/bulk-operations.md | 148 -- .../operations-api/clustering-nats.md | 486 ----- .../developers/operations-api/clustering.md | 355 ---- .../developers/operations-api/components.md | 512 ----- .../operations-api/custom-functions.md | 281 --- .../operations-api/databases-and-tables.md | 388 ---- .../developers/operations-api/index.md | 52 - .../developers/operations-api/jobs.md | 87 - .../developers/operations-api/logs.md | 732 ------- .../operations-api/nosql-operations.md | 384 ---- .../operations-api/quickstart-examples.md | 370 ---- .../developers/operations-api/registration.md | 70 - .../operations-api/sql-operations.md | 127 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- .../developers/operations-api/utilities.md | 463 ----- .../version-4.5/developers/real-time.md | 180 -- .../developers/replication/index.md | 280 --- .../developers/replication/sharding.md | 167 -- versioned_docs/version-4.5/developers/rest.md | 403 ---- .../developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 74 - .../developers/security/configuration.md | 40 - .../version-4.5/developers/security/index.md | 13 - .../developers/security/jwt-auth.md | 96 - .../developers/security/mtls-auth.md | 7 - .../developers/security/users-and-roles.md | 272 --- versioned_docs/version-4.5/index.mdx | 53 - .../version-4.5/reference/_category_.json | 6 - .../version-4.5/reference/analytics.md | 117 -- .../version-4.5/reference/architecture.md | 42 - versioned_docs/version-4.5/reference/blob.md | 146 -- .../clustering/certificate-management.md | 70 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.5/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 199 -- .../reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - .../version-4.5/reference/content-types.md | 35 - .../version-4.5/reference/data-types.md | 60 - .../version-4.5/reference/dynamic-schema.md | 148 -- .../version-4.5/reference/globals.md | 399 ---- .../version-4.5/reference/graphql.md | 254 --- .../version-4.5/reference/headers.md | 12 - versioned_docs/version-4.5/reference/index.md | 9 - .../version-4.5/reference/limits.md | 37 - .../reference/query-optimization.md | 37 - .../version-4.5/reference/resource.md | 797 -------- versioned_docs/version-4.5/reference/roles.md | 117 -- .../reference/sql-guide/date-functions.md | 227 --- .../reference/sql-guide/features-matrix.md | 88 - .../reference/sql-guide/functions.md | 145 -- .../version-4.5/reference/sql-guide/index.md | 88 - .../reference/sql-guide/json-search.md | 177 -- .../reference/sql-guide/reserved-word.md | 207 -- .../sql-guide/sql-geospatial-functions.md | 419 ---- .../reference/storage-algorithm.md | 27 - .../version-4.5/reference/transactions.md | 40 - .../administration/_category_.json | 10 - .../administration/administration.md | 32 - .../version-4.6/administration/cloning.md | 153 -- .../version-4.6/administration/compact.md | 60 - .../harper-studio/create-account.md | 27 - .../harper-studio/enable-mixed-content.md | 27 - .../administration/harper-studio/index.md | 21 - .../harper-studio/instance-configuration.md | 108 - .../harper-studio/instance-metrics.md | 16 - .../administration/harper-studio/instances.md | 146 -- .../harper-studio/login-password-reset.md | 42 - .../harper-studio/manage-applications.md | 59 - .../manage-databases-browse-data.md | 123 -- .../harper-studio/manage-instance-roles.md | 77 - .../harper-studio/manage-instance-users.md | 53 - .../harper-studio/manage-replication.md | 90 - .../harper-studio/organizations.md | 109 - .../harper-studio/query-instance-data.md | 52 - .../version-4.6/administration/jobs.md | 112 -- .../administration/logging/audit-logging.md | 126 -- .../administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - .../version-4.6/deployments/_category_.json | 10 - .../version-4.6/deployments/configuration.md | 1235 ------------ .../version-4.6/deployments/harper-cli.md | 194 -- .../deployments/harper-cloud/alarms.md | 20 - .../deployments/harper-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - .../deployments/harper-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - .../deployments/install-harper/index.md | 61 - .../deployments/install-harper/linux.md | 225 --- .../deployments/upgrade-hdb-instance.md | 140 -- .../version-4.6/developers/_category_.json | 10 - .../developers/applications/caching.md | 325 --- .../developers/applications/data-loader.md | 195 -- .../developers/applications/debugging.md | 39 - .../developers/applications/define-routes.md | 119 -- .../developers/applications/defining-roles.md | 85 - .../applications/defining-schemas.md | 272 --- .../developers/applications/index.md | 237 --- .../applications/web-applications.md | 63 - .../advanced-json-sql-examples.md | 1775 ----------------- .../developers/operations-api/analytics.md | 121 -- .../operations-api/bulk-operations.md | 255 --- .../operations-api/certificate-management.md | 124 -- .../operations-api/clustering-nats.md | 486 ----- .../developers/operations-api/clustering.md | 355 ---- .../developers/operations-api/components.md | 546 ----- .../operations-api/configuration.md | 135 -- .../operations-api/custom-functions.md | 281 --- .../operations-api/databases-and-tables.md | 388 ---- .../developers/operations-api/index.md | 55 - .../developers/operations-api/jobs.md | 87 - .../developers/operations-api/logs.md | 732 ------- .../operations-api/nosql-operations.md | 389 ---- .../operations-api/quickstart-examples.md | 370 ---- .../developers/operations-api/registration.md | 70 - .../operations-api/sql-operations.md | 127 -- .../operations-api/system-operations.md | 195 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- .../version-4.6/developers/real-time.md | 180 -- .../developers/replication/index.md | 280 --- .../developers/replication/sharding.md | 167 -- versioned_docs/version-4.6/developers/rest.md | 403 ---- .../developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 74 - .../developers/security/configuration.md | 40 - .../version-4.6/developers/security/index.md | 13 - .../developers/security/jwt-auth.md | 96 - .../developers/security/mtls-auth.md | 7 - .../developers/security/users-and-roles.md | 272 --- versioned_docs/version-4.6/index.mdx | 55 - .../version-4.6/reference/_category_.json | 6 - .../version-4.6/reference/analytics.md | 173 -- .../version-4.6/reference/architecture.md | 42 - versioned_docs/version-4.6/reference/blob.md | 146 -- .../clustering/certificate-management.md | 70 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.6/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 199 -- .../reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - .../reference/components/applications.md | 184 -- .../components/built-in-extensions.md | 188 -- .../reference/components/configuration.md | 89 - .../reference/components/extensions.md | 187 -- .../version-4.6/reference/components/index.md | 39 - .../reference/components/plugins.md | 619 ------ .../version-4.6/reference/content-types.md | 35 - .../version-4.6/reference/data-types.md | 60 - .../version-4.6/reference/dynamic-schema.md | 148 -- .../version-4.6/reference/globals.md | 424 ---- .../version-4.6/reference/graphql.md | 254 --- .../version-4.6/reference/headers.md | 12 - versioned_docs/version-4.6/reference/index.md | 9 - .../version-4.6/reference/limits.md | 37 - .../version-4.6/reference/resources/index.md | 796 -------- .../reference/resources/instance-binding.md | 721 ------- .../reference/resources/migration.md | 137 -- .../reference/resources/query-optimization.md | 37 - versioned_docs/version-4.6/reference/roles.md | 117 -- .../reference/sql-guide/date-functions.md | 227 --- .../reference/sql-guide/features-matrix.md | 88 - .../reference/sql-guide/functions.md | 145 -- .../version-4.6/reference/sql-guide/index.md | 88 - .../reference/sql-guide/json-search.md | 177 -- .../reference/sql-guide/reserved-word.md | 207 -- .../sql-guide/sql-geospatial-functions.md | 419 ---- .../reference/storage-algorithm.md | 27 - .../version-4.6/reference/transactions.md | 40 - .../administration/_category_.json | 10 - .../administration/administration.md | 32 - .../version-4.7/administration/cloning.md | 153 -- .../version-4.7/administration/compact.md | 60 - .../harper-studio/create-account.md | 27 - .../harper-studio/enable-mixed-content.md | 9 - .../administration/harper-studio/index.md | 21 - .../harper-studio/instance-configuration.md | 108 - .../harper-studio/instance-metrics.md | 16 - .../administration/harper-studio/instances.md | 146 -- .../harper-studio/login-password-reset.md | 42 - .../harper-studio/manage-applications.md | 59 - .../manage-databases-browse-data.md | 123 -- .../harper-studio/manage-instance-roles.md | 77 - .../harper-studio/manage-instance-users.md | 53 - .../harper-studio/manage-replication.md | 90 - .../harper-studio/organizations.md | 109 - .../harper-studio/query-instance-data.md | 52 - .../version-4.7/administration/jobs.md | 112 -- .../administration/logging/audit-logging.md | 126 -- .../administration/logging/index.md | 11 - .../logging/standard-logging.md | 65 - .../logging/transaction-logging.md | 87 - .../version-4.7/deployments/_category_.json | 10 - .../version-4.7/deployments/configuration.md | 1556 --------------- .../version-4.7/deployments/harper-cli.md | 194 -- .../deployments/harper-cloud/alarms.md | 20 - .../deployments/harper-cloud/index.md | 9 - .../instance-size-hardware-specs.md | 23 - .../deployments/harper-cloud/iops-impact.md | 45 - .../verizon-5g-wavelength-instances.md | 31 - .../deployments/install-harper/index.md | 94 - .../deployments/install-harper/linux.md | 225 --- .../deployments/upgrade-hdb-instance.md | 140 -- .../version-4.7/developers/_category_.json | 10 - .../developers/applications/caching.md | 325 --- .../developers/applications/data-loader.md | 195 -- .../developers/applications/debugging.md | 39 - .../developers/applications/define-routes.md | 119 -- .../developers/applications/defining-roles.md | 85 - .../applications/defining-schemas.md | 272 --- .../developers/applications/index.md | 237 --- .../applications/web-applications.md | 63 - .../advanced-json-sql-examples.md | 1775 ----------------- .../developers/operations-api/analytics.md | 121 -- .../operations-api/bulk-operations.md | 255 --- .../operations-api/certificate-management.md | 124 -- .../operations-api/clustering-nats.md | 486 ----- .../developers/operations-api/clustering.md | 355 ---- .../developers/operations-api/components.md | 553 ----- .../operations-api/configuration.md | 135 -- .../operations-api/custom-functions.md | 281 --- .../operations-api/databases-and-tables.md | 388 ---- .../developers/operations-api/index.md | 55 - .../developers/operations-api/jobs.md | 87 - .../developers/operations-api/logs.md | 733 ------- .../operations-api/nosql-operations.md | 389 ---- .../operations-api/quickstart-examples.md | 370 ---- .../developers/operations-api/registration.md | 231 --- .../operations-api/sql-operations.md | 127 -- .../operations-api/system-operations.md | 195 -- .../operations-api/token-authentication.md | 60 - .../operations-api/users-and-roles.md | 508 ----- .../version-4.7/developers/real-time.md | 180 -- .../developers/replication/index.md | 300 --- .../developers/replication/sharding.md | 167 -- versioned_docs/version-4.7/developers/rest.md | 403 ---- .../developers/security/basic-auth.md | 57 - .../security/certificate-management.md | 153 -- .../security/certificate-verification.md | 502 ----- .../developers/security/configuration.md | 40 - .../version-4.7/developers/security/index.md | 23 - .../developers/security/jwt-auth.md | 96 - .../developers/security/mtls-auth.md | 24 - .../developers/security/users-and-roles.md | 273 --- versioned_docs/version-4.7/index.mdx | 55 - .../version-4.7/reference/_category_.json | 6 - .../version-4.7/reference/analytics.md | 173 -- .../version-4.7/reference/architecture.md | 42 - versioned_docs/version-4.7/reference/blob.md | 146 -- .../clustering/certificate-management.md | 82 - .../clustering/creating-a-cluster-user.md | 59 - .../clustering/enabling-clustering.md | 49 - .../clustering/establishing-routes.md | 73 - .../version-4.7/reference/clustering/index.md | 31 - .../clustering/managing-subscriptions.md | 199 -- .../reference/clustering/naming-a-node.md | 45 - .../requirements-and-definitions.md | 11 - .../clustering/subscription-overview.md | 45 - .../clustering/things-worth-knowing.md | 43 - .../reference/components/applications.md | 221 -- .../components/built-in-extensions.md | 319 --- .../reference/components/configuration.md | 89 - .../reference/components/extensions.md | 187 -- .../version-4.7/reference/components/index.md | 39 - .../reference/components/plugins.md | 629 ------ .../version-4.7/reference/content-types.md | 35 - .../version-4.7/reference/data-types.md | 60 - .../version-4.7/reference/dynamic-schema.md | 148 -- .../version-4.7/reference/globals.md | 422 ---- .../version-4.7/reference/graphql.md | 254 --- .../version-4.7/reference/headers.md | 12 - versioned_docs/version-4.7/reference/index.md | 9 - .../version-4.7/reference/limits.md | 37 - .../version-4.7/reference/resources/index.md | 796 -------- .../reference/resources/instance-binding.md | 721 ------- .../reference/resources/migration.md | 137 -- .../reference/resources/query-optimization.md | 37 - versioned_docs/version-4.7/reference/roles.md | 117 -- .../reference/sql-guide/date-functions.md | 227 --- .../reference/sql-guide/features-matrix.md | 88 - .../reference/sql-guide/functions.md | 145 -- .../version-4.7/reference/sql-guide/index.md | 88 - .../reference/sql-guide/json-search.md | 177 -- .../reference/sql-guide/reserved-word.md | 207 -- .../sql-guide/sql-geospatial-functions.md | 419 ---- .../reference/storage-algorithm.md | 27 - .../version-4.7/reference/transactions.md | 40 - 872 files changed, 135292 deletions(-) delete mode 100644 docs/administration/_category_.json delete mode 100644 docs/administration/administration.md delete mode 100644 docs/administration/cloning.md delete mode 100644 docs/administration/compact.md delete mode 100644 docs/administration/harper-studio/create-account.md delete mode 100644 docs/administration/harper-studio/enable-mixed-content.md delete mode 100644 docs/administration/harper-studio/index.md delete mode 100644 docs/administration/harper-studio/instance-configuration.md delete mode 100644 docs/administration/harper-studio/instance-metrics.md delete mode 100644 docs/administration/harper-studio/instances.md delete mode 100644 docs/administration/harper-studio/login-password-reset.md delete mode 100644 docs/administration/harper-studio/manage-applications.md delete mode 100644 docs/administration/harper-studio/manage-databases-browse-data.md delete mode 100644 docs/administration/harper-studio/manage-instance-roles.md delete mode 100644 docs/administration/harper-studio/manage-instance-users.md delete mode 100644 docs/administration/harper-studio/manage-replication.md delete mode 100644 docs/administration/harper-studio/organizations.md delete mode 100644 docs/administration/harper-studio/query-instance-data.md delete mode 100644 docs/administration/jobs.md delete mode 100644 docs/administration/logging/audit-logging.md delete mode 100644 docs/administration/logging/index.md delete mode 100644 docs/administration/logging/standard-logging.md delete mode 100644 docs/administration/logging/transaction-logging.md delete mode 100644 docs/deployments/_category_.json delete mode 100644 docs/deployments/configuration.md delete mode 100644 docs/deployments/harper-cli.md delete mode 100644 docs/deployments/harper-cloud/alarms.md delete mode 100644 docs/deployments/harper-cloud/index.md delete mode 100644 docs/deployments/harper-cloud/instance-size-hardware-specs.md delete mode 100644 docs/deployments/harper-cloud/iops-impact.md delete mode 100644 docs/deployments/harper-cloud/verizon-5g-wavelength-instances.md delete mode 100644 docs/deployments/install-harper/index.md delete mode 100644 docs/deployments/install-harper/linux.md delete mode 100644 docs/deployments/upgrade-hdb-instance.md delete mode 100644 docs/developers/_category_.json delete mode 100644 docs/developers/applications/caching.md delete mode 100644 docs/developers/applications/data-loader.md delete mode 100644 docs/developers/applications/debugging.md delete mode 100644 docs/developers/applications/define-routes.md delete mode 100644 docs/developers/applications/defining-roles.md delete mode 100644 docs/developers/applications/defining-schemas.md delete mode 100644 docs/developers/applications/index.md delete mode 100644 docs/developers/applications/web-applications.md delete mode 100644 docs/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 docs/developers/operations-api/analytics.md delete mode 100644 docs/developers/operations-api/bulk-operations.md delete mode 100644 docs/developers/operations-api/certificate-management.md delete mode 100644 docs/developers/operations-api/clustering-nats.md delete mode 100644 docs/developers/operations-api/clustering.md delete mode 100644 docs/developers/operations-api/components.md delete mode 100644 docs/developers/operations-api/configuration.md delete mode 100644 docs/developers/operations-api/custom-functions.md delete mode 100644 docs/developers/operations-api/databases-and-tables.md delete mode 100644 docs/developers/operations-api/index.md delete mode 100644 docs/developers/operations-api/jobs.md delete mode 100644 docs/developers/operations-api/logs.md delete mode 100644 docs/developers/operations-api/nosql-operations.md delete mode 100644 docs/developers/operations-api/quickstart-examples.md delete mode 100644 docs/developers/operations-api/registration.md delete mode 100644 docs/developers/operations-api/sql-operations.md delete mode 100644 docs/developers/operations-api/system-operations.md delete mode 100644 docs/developers/operations-api/token-authentication.md delete mode 100644 docs/developers/operations-api/users-and-roles.md delete mode 100644 docs/developers/real-time.md delete mode 100644 docs/developers/replication/index.md delete mode 100644 docs/developers/replication/sharding.md delete mode 100644 docs/developers/rest.md delete mode 100644 docs/developers/security/basic-auth.md delete mode 100644 docs/developers/security/certificate-management.md delete mode 100644 docs/developers/security/certificate-verification.md delete mode 100644 docs/developers/security/configuration.md delete mode 100644 docs/developers/security/index.md delete mode 100644 docs/developers/security/jwt-auth.md delete mode 100644 docs/developers/security/mtls-auth.md delete mode 100644 docs/developers/security/users-and-roles.md delete mode 100644 docs/index.mdx delete mode 100644 docs/reference/_category_.json delete mode 100644 docs/reference/analytics.md delete mode 100644 docs/reference/architecture.md delete mode 100644 docs/reference/blob.md delete mode 100644 docs/reference/clustering/certificate-management.md delete mode 100644 docs/reference/clustering/creating-a-cluster-user.md delete mode 100644 docs/reference/clustering/enabling-clustering.md delete mode 100644 docs/reference/clustering/establishing-routes.md delete mode 100644 docs/reference/clustering/index.md delete mode 100644 docs/reference/clustering/managing-subscriptions.md delete mode 100644 docs/reference/clustering/naming-a-node.md delete mode 100644 docs/reference/clustering/requirements-and-definitions.md delete mode 100644 docs/reference/clustering/subscription-overview.md delete mode 100644 docs/reference/clustering/things-worth-knowing.md delete mode 100644 docs/reference/components/applications.md delete mode 100644 docs/reference/components/built-in-extensions.md delete mode 100644 docs/reference/components/configuration.md delete mode 100644 docs/reference/components/extensions.md delete mode 100644 docs/reference/components/index.md delete mode 100644 docs/reference/components/plugins.md delete mode 100644 docs/reference/content-types.md delete mode 100644 docs/reference/data-types.md delete mode 100644 docs/reference/dynamic-schema.md delete mode 100644 docs/reference/globals.md delete mode 100644 docs/reference/graphql.md delete mode 100644 docs/reference/headers.md delete mode 100644 docs/reference/index.md delete mode 100644 docs/reference/limits.md delete mode 100644 docs/reference/resources/index.md delete mode 100644 docs/reference/resources/instance-binding.md delete mode 100644 docs/reference/resources/migration.md delete mode 100644 docs/reference/resources/query-optimization.md delete mode 100644 docs/reference/roles.md delete mode 100644 docs/reference/sql-guide/date-functions.md delete mode 100644 docs/reference/sql-guide/features-matrix.md delete mode 100644 docs/reference/sql-guide/functions.md delete mode 100644 docs/reference/sql-guide/index.md delete mode 100644 docs/reference/sql-guide/json-search.md delete mode 100644 docs/reference/sql-guide/reserved-word.md delete mode 100644 docs/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 docs/reference/storage-algorithm.md delete mode 100644 docs/reference/transactions.md delete mode 100644 versioned_docs/version-4.1/add-ons-and-sdks/google-data-studio.md delete mode 100644 versioned_docs/version-4.1/add-ons-and-sdks/index.md delete mode 100644 versioned_docs/version-4.1/audit-logging.md delete mode 100644 versioned_docs/version-4.1/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.1/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.1/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.1/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.1/clustering/index.md delete mode 100644 versioned_docs/version-4.1/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.1/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.1/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.1/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.1/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.1/configuration.md delete mode 100644 versioned_docs/version-4.1/custom-functions/create-project.md delete mode 100644 versioned_docs/version-4.1/custom-functions/custom-functions-operations.md delete mode 100644 versioned_docs/version-4.1/custom-functions/debugging-custom-function.md delete mode 100644 versioned_docs/version-4.1/custom-functions/define-helpers.md delete mode 100644 versioned_docs/version-4.1/custom-functions/define-routes.md delete mode 100644 versioned_docs/version-4.1/custom-functions/example-projects.md delete mode 100644 versioned_docs/version-4.1/custom-functions/host-static.md delete mode 100644 versioned_docs/version-4.1/custom-functions/index.md delete mode 100644 versioned_docs/version-4.1/custom-functions/requirements-definitions.md delete mode 100644 versioned_docs/version-4.1/custom-functions/restarting-server.md delete mode 100644 versioned_docs/version-4.1/custom-functions/templates.md delete mode 100644 versioned_docs/version-4.1/custom-functions/using-npm-git.md delete mode 100644 versioned_docs/version-4.1/getting-started/getting-started.md delete mode 100644 versioned_docs/version-4.1/harperdb-cli.md delete mode 100644 versioned_docs/version-4.1/harperdb-cloud/alarms.md delete mode 100644 versioned_docs/version-4.1/harperdb-cloud/index.md delete mode 100644 versioned_docs/version-4.1/harperdb-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.1/harperdb-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.1/harperdb-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/create-account.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/index.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/instance-example-code.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/instances.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/manage-charts.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/manage-clustering.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/manage-functions.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/manage-schemas-browse-data.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/organizations.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/query-instance-data.md delete mode 100644 versioned_docs/version-4.1/harperdb-studio/resources.md delete mode 100644 versioned_docs/version-4.1/index.md delete mode 100644 versioned_docs/version-4.1/install-harperdb/index.md delete mode 100644 versioned_docs/version-4.1/install-harperdb/linux.md delete mode 100644 versioned_docs/version-4.1/jobs.md delete mode 100644 versioned_docs/version-4.1/logging.md delete mode 100644 versioned_docs/version-4.1/reference/content-types.md delete mode 100644 versioned_docs/version-4.1/reference/data-types.md delete mode 100644 versioned_docs/version-4.1/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.1/reference/headers.md delete mode 100644 versioned_docs/version-4.1/reference/index.md delete mode 100644 versioned_docs/version-4.1/reference/limits.md delete mode 100644 versioned_docs/version-4.1/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.1/security/basic-auth.md delete mode 100644 versioned_docs/version-4.1/security/certificate-management.md delete mode 100644 versioned_docs/version-4.1/security/configuration.md delete mode 100644 versioned_docs/version-4.1/security/index.md delete mode 100644 versioned_docs/version-4.1/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.1/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.1/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.1/sql-guide/delete.md delete mode 100644 versioned_docs/version-4.1/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.1/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.1/sql-guide/index.md delete mode 100644 versioned_docs/version-4.1/sql-guide/insert.md delete mode 100644 versioned_docs/version-4.1/sql-guide/joins.md delete mode 100644 versioned_docs/version-4.1/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.1/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.1/sql-guide/select.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoarea.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocontains.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoconvert.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocrosses.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodifference.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodistance.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoequal.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geolength.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geonear.md delete mode 100644 versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/index.md delete mode 100644 versioned_docs/version-4.1/sql-guide/update.md delete mode 100644 versioned_docs/version-4.1/support.md delete mode 100644 versioned_docs/version-4.1/transaction-logging.md delete mode 100644 versioned_docs/version-4.1/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.2/administration/_category_.json delete mode 100644 versioned_docs/version-4.2/administration/administration.md delete mode 100644 versioned_docs/version-4.2/administration/cloning.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/create-account.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/index.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/instance-example-code.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/instances.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/manage-charts.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/manage-clustering.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/manage-functions.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/manage-schemas-browse-data.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/organizations.md delete mode 100644 versioned_docs/version-4.2/administration/harperdb-studio/query-instance-data.md delete mode 100644 versioned_docs/version-4.2/administration/jobs.md delete mode 100644 versioned_docs/version-4.2/administration/logging/audit-logging.md delete mode 100644 versioned_docs/version-4.2/administration/logging/index.md delete mode 100644 versioned_docs/version-4.2/administration/logging/standard-logging.md delete mode 100644 versioned_docs/version-4.2/administration/logging/transaction-logging.md delete mode 100644 versioned_docs/version-4.2/deployments/_category_.json delete mode 100644 versioned_docs/version-4.2/deployments/configuration.md delete mode 100644 versioned_docs/version-4.2/deployments/harperdb-cli.md delete mode 100644 versioned_docs/version-4.2/deployments/harperdb-cloud/alarms.md delete mode 100644 versioned_docs/version-4.2/deployments/harperdb-cloud/index.md delete mode 100644 versioned_docs/version-4.2/deployments/harperdb-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.2/deployments/harperdb-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.2/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.2/deployments/install-harperdb/index.md delete mode 100644 versioned_docs/version-4.2/deployments/install-harperdb/linux.md delete mode 100644 versioned_docs/version-4.2/deployments/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.2/developers/_category_.json delete mode 100644 versioned_docs/version-4.2/developers/applications/caching.md delete mode 100644 versioned_docs/version-4.2/developers/applications/debugging.md delete mode 100644 versioned_docs/version-4.2/developers/applications/define-routes.md delete mode 100644 versioned_docs/version-4.2/developers/applications/defining-schemas.md delete mode 100644 versioned_docs/version-4.2/developers/applications/example-projects.md delete mode 100644 versioned_docs/version-4.2/developers/applications/index.md delete mode 100644 versioned_docs/version-4.2/developers/components/drivers.md delete mode 100644 versioned_docs/version-4.2/developers/components/google-data-studio.md delete mode 100644 versioned_docs/version-4.2/developers/components/index.md delete mode 100644 versioned_docs/version-4.2/developers/components/installing.md delete mode 100644 versioned_docs/version-4.2/developers/components/operations.md delete mode 100644 versioned_docs/version-4.2/developers/components/sdks.md delete mode 100644 versioned_docs/version-4.2/developers/components/writing-extensions.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/bulk-operations.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/clustering.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/components.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/custom-functions.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/databases-and-tables.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/index.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/jobs.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/logs.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/nosql-operations.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/quickstart-examples.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/registration.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/sql-operations.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/token-authentication.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/users-and-roles.md delete mode 100644 versioned_docs/version-4.2/developers/operations-api/utilities.md delete mode 100644 versioned_docs/version-4.2/developers/real-time.md delete mode 100644 versioned_docs/version-4.2/developers/rest.md delete mode 100644 versioned_docs/version-4.2/developers/security/basic-auth.md delete mode 100644 versioned_docs/version-4.2/developers/security/certificate-management.md delete mode 100644 versioned_docs/version-4.2/developers/security/configuration.md delete mode 100644 versioned_docs/version-4.2/developers/security/index.md delete mode 100644 versioned_docs/version-4.2/developers/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.2/developers/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.2/getting-started.md delete mode 100644 versioned_docs/version-4.2/index.md delete mode 100644 versioned_docs/version-4.2/reference/_category_.json delete mode 100644 versioned_docs/version-4.2/reference/analytics.md delete mode 100644 versioned_docs/version-4.2/reference/architecture.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/index.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.2/reference/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.2/reference/content-types.md delete mode 100644 versioned_docs/version-4.2/reference/data-types.md delete mode 100644 versioned_docs/version-4.2/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.2/reference/globals.md delete mode 100644 versioned_docs/version-4.2/reference/headers.md delete mode 100644 versioned_docs/version-4.2/reference/index.md delete mode 100644 versioned_docs/version-4.2/reference/limits.md delete mode 100644 versioned_docs/version-4.2/reference/resource.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/index.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.2/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 versioned_docs/version-4.2/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.2/reference/transactions.md delete mode 100644 versioned_docs/version-4.3/administration/_category_.json delete mode 100644 versioned_docs/version-4.3/administration/administration.md delete mode 100644 versioned_docs/version-4.3/administration/cloning.md delete mode 100644 versioned_docs/version-4.3/administration/compact.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/create-account.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/index.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/instances.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/manage-applications.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/manage-charts.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/manage-databases-browse-data.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/manage-replication.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/organizations.md delete mode 100644 versioned_docs/version-4.3/administration/harperdb-studio/query-instance-data.md delete mode 100644 versioned_docs/version-4.3/administration/jobs.md delete mode 100644 versioned_docs/version-4.3/administration/logging/audit-logging.md delete mode 100644 versioned_docs/version-4.3/administration/logging/index.md delete mode 100644 versioned_docs/version-4.3/administration/logging/standard-logging.md delete mode 100644 versioned_docs/version-4.3/administration/logging/transaction-logging.md delete mode 100644 versioned_docs/version-4.3/deployments/_category_.json delete mode 100644 versioned_docs/version-4.3/deployments/configuration.md delete mode 100644 versioned_docs/version-4.3/deployments/harperdb-cli.md delete mode 100644 versioned_docs/version-4.3/deployments/harperdb-cloud/alarms.md delete mode 100644 versioned_docs/version-4.3/deployments/harperdb-cloud/index.md delete mode 100644 versioned_docs/version-4.3/deployments/harperdb-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.3/deployments/harperdb-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.3/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.3/deployments/install-harperdb/index.md delete mode 100644 versioned_docs/version-4.3/deployments/install-harperdb/linux.md delete mode 100644 versioned_docs/version-4.3/deployments/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.3/developers/_category_.json delete mode 100644 versioned_docs/version-4.3/developers/applications/caching.md delete mode 100644 versioned_docs/version-4.3/developers/applications/debugging.md delete mode 100644 versioned_docs/version-4.3/developers/applications/define-routes.md delete mode 100644 versioned_docs/version-4.3/developers/applications/defining-schemas.md delete mode 100644 versioned_docs/version-4.3/developers/applications/example-projects.md delete mode 100644 versioned_docs/version-4.3/developers/applications/index.md delete mode 100644 versioned_docs/version-4.3/developers/components/drivers.md delete mode 100644 versioned_docs/version-4.3/developers/components/google-data-studio.md delete mode 100644 versioned_docs/version-4.3/developers/components/index.md delete mode 100644 versioned_docs/version-4.3/developers/components/installing.md delete mode 100644 versioned_docs/version-4.3/developers/components/operations.md delete mode 100644 versioned_docs/version-4.3/developers/components/sdks.md delete mode 100644 versioned_docs/version-4.3/developers/components/writing-extensions.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/bulk-operations.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/clustering.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/components.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/custom-functions.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/databases-and-tables.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/index.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/jobs.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/logs.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/nosql-operations.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/quickstart-examples.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/registration.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/sql-operations.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/token-authentication.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/users-and-roles.md delete mode 100644 versioned_docs/version-4.3/developers/operations-api/utilities.md delete mode 100644 versioned_docs/version-4.3/developers/real-time.md delete mode 100644 versioned_docs/version-4.3/developers/rest.md delete mode 100644 versioned_docs/version-4.3/developers/security/basic-auth.md delete mode 100644 versioned_docs/version-4.3/developers/security/certificate-management.md delete mode 100644 versioned_docs/version-4.3/developers/security/configuration.md delete mode 100644 versioned_docs/version-4.3/developers/security/index.md delete mode 100644 versioned_docs/version-4.3/developers/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.3/developers/security/mtls-auth.md delete mode 100644 versioned_docs/version-4.3/developers/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.3/getting-started.md delete mode 100644 versioned_docs/version-4.3/index.md delete mode 100644 versioned_docs/version-4.3/reference/_category_.json delete mode 100644 versioned_docs/version-4.3/reference/analytics.md delete mode 100644 versioned_docs/version-4.3/reference/architecture.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/index.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.3/reference/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.3/reference/content-types.md delete mode 100644 versioned_docs/version-4.3/reference/data-types.md delete mode 100644 versioned_docs/version-4.3/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.3/reference/globals.md delete mode 100644 versioned_docs/version-4.3/reference/headers.md delete mode 100644 versioned_docs/version-4.3/reference/index.md delete mode 100644 versioned_docs/version-4.3/reference/limits.md delete mode 100644 versioned_docs/version-4.3/reference/resource.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/index.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.3/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 versioned_docs/version-4.3/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.3/reference/transactions.md delete mode 100644 versioned_docs/version-4.4/administration/_category_.json delete mode 100644 versioned_docs/version-4.4/administration/administration.md delete mode 100644 versioned_docs/version-4.4/administration/cloning.md delete mode 100644 versioned_docs/version-4.4/administration/compact.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/create-account.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/index.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/instances.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/manage-applications.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/manage-databases-browse-data.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/manage-replication.md delete mode 100644 versioned_docs/version-4.4/administration/harper-studio/organizations.md delete mode 100644 versioned_docs/version-4.4/administration/jobs.md delete mode 100644 versioned_docs/version-4.4/administration/logging/audit-logging.md delete mode 100644 versioned_docs/version-4.4/administration/logging/index.md delete mode 100644 versioned_docs/version-4.4/administration/logging/standard-logging.md delete mode 100644 versioned_docs/version-4.4/administration/logging/transaction-logging.md delete mode 100644 versioned_docs/version-4.4/deployments/_category_.json delete mode 100644 versioned_docs/version-4.4/deployments/configuration.md delete mode 100644 versioned_docs/version-4.4/deployments/harper-cli.md delete mode 100644 versioned_docs/version-4.4/deployments/harper-cloud/alarms.md delete mode 100644 versioned_docs/version-4.4/deployments/harper-cloud/index.md delete mode 100644 versioned_docs/version-4.4/deployments/harper-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.4/deployments/harper-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.4/deployments/harper-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.4/deployments/install-harper/index.md delete mode 100644 versioned_docs/version-4.4/deployments/install-harper/linux.md delete mode 100644 versioned_docs/version-4.4/deployments/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.4/developers/_category_.json delete mode 100644 versioned_docs/version-4.4/developers/applications/caching.md delete mode 100644 versioned_docs/version-4.4/developers/applications/debugging.md delete mode 100644 versioned_docs/version-4.4/developers/applications/define-routes.md delete mode 100644 versioned_docs/version-4.4/developers/applications/defining-roles.md delete mode 100644 versioned_docs/version-4.4/developers/applications/defining-schemas.md delete mode 100644 versioned_docs/version-4.4/developers/applications/index.md delete mode 100644 versioned_docs/version-4.4/developers/applications/web-applications.md delete mode 100644 versioned_docs/version-4.4/developers/components/built-in.md delete mode 100644 versioned_docs/version-4.4/developers/components/index.md delete mode 100644 versioned_docs/version-4.4/developers/components/managing.md delete mode 100644 versioned_docs/version-4.4/developers/components/reference.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/bulk-operations.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/clustering-nats.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/clustering.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/components.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/custom-functions.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/databases-and-tables.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/index.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/jobs.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/logs.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/nosql-operations.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/quickstart-examples.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/registration.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/sql-operations.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/token-authentication.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/users-and-roles.md delete mode 100644 versioned_docs/version-4.4/developers/operations-api/utilities.md delete mode 100644 versioned_docs/version-4.4/developers/real-time.md delete mode 100644 versioned_docs/version-4.4/developers/replication/index.md delete mode 100644 versioned_docs/version-4.4/developers/replication/sharding.md delete mode 100644 versioned_docs/version-4.4/developers/rest.md delete mode 100644 versioned_docs/version-4.4/developers/security/basic-auth.md delete mode 100644 versioned_docs/version-4.4/developers/security/certificate-management.md delete mode 100644 versioned_docs/version-4.4/developers/security/configuration.md delete mode 100644 versioned_docs/version-4.4/developers/security/index.md delete mode 100644 versioned_docs/version-4.4/developers/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.4/developers/security/mtls-auth.md delete mode 100644 versioned_docs/version-4.4/developers/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.4/index.md delete mode 100644 versioned_docs/version-4.4/reference/_category_.json delete mode 100644 versioned_docs/version-4.4/reference/analytics.md delete mode 100644 versioned_docs/version-4.4/reference/architecture.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/index.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.4/reference/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.4/reference/content-types.md delete mode 100644 versioned_docs/version-4.4/reference/data-types.md delete mode 100644 versioned_docs/version-4.4/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.4/reference/globals.md delete mode 100644 versioned_docs/version-4.4/reference/graphql.md delete mode 100644 versioned_docs/version-4.4/reference/headers.md delete mode 100644 versioned_docs/version-4.4/reference/index.md delete mode 100644 versioned_docs/version-4.4/reference/limits.md delete mode 100644 versioned_docs/version-4.4/reference/query-optimization.md delete mode 100644 versioned_docs/version-4.4/reference/resource.md delete mode 100644 versioned_docs/version-4.4/reference/roles.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/index.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.4/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 versioned_docs/version-4.4/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.4/reference/transactions.md delete mode 100644 versioned_docs/version-4.5/administration/_category_.json delete mode 100644 versioned_docs/version-4.5/administration/administration.md delete mode 100644 versioned_docs/version-4.5/administration/cloning.md delete mode 100644 versioned_docs/version-4.5/administration/compact.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/create-account.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/index.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/instances.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/manage-applications.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/manage-databases-browse-data.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/manage-replication.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/organizations.md delete mode 100644 versioned_docs/version-4.5/administration/harper-studio/query-instance-data.md delete mode 100644 versioned_docs/version-4.5/administration/jobs.md delete mode 100644 versioned_docs/version-4.5/administration/logging/audit-logging.md delete mode 100644 versioned_docs/version-4.5/administration/logging/index.md delete mode 100644 versioned_docs/version-4.5/administration/logging/standard-logging.md delete mode 100644 versioned_docs/version-4.5/administration/logging/transaction-logging.md delete mode 100644 versioned_docs/version-4.5/deployments/_category_.json delete mode 100644 versioned_docs/version-4.5/deployments/configuration.md delete mode 100644 versioned_docs/version-4.5/deployments/harper-cli.md delete mode 100644 versioned_docs/version-4.5/deployments/harper-cloud/alarms.md delete mode 100644 versioned_docs/version-4.5/deployments/harper-cloud/index.md delete mode 100644 versioned_docs/version-4.5/deployments/harper-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.5/deployments/harper-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.5/deployments/harper-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.5/deployments/install-harper/index.md delete mode 100644 versioned_docs/version-4.5/deployments/install-harper/linux.md delete mode 100644 versioned_docs/version-4.5/deployments/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.5/developers/_category_.json delete mode 100644 versioned_docs/version-4.5/developers/applications/caching.md delete mode 100644 versioned_docs/version-4.5/developers/applications/debugging.md delete mode 100644 versioned_docs/version-4.5/developers/applications/define-routes.md delete mode 100644 versioned_docs/version-4.5/developers/applications/defining-roles.md delete mode 100644 versioned_docs/version-4.5/developers/applications/defining-schemas.md delete mode 100644 versioned_docs/version-4.5/developers/applications/index.md delete mode 100644 versioned_docs/version-4.5/developers/applications/web-applications.md delete mode 100644 versioned_docs/version-4.5/developers/components/built-in.md delete mode 100644 versioned_docs/version-4.5/developers/components/index.md delete mode 100644 versioned_docs/version-4.5/developers/components/managing.md delete mode 100644 versioned_docs/version-4.5/developers/components/reference.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/bulk-operations.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/clustering-nats.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/clustering.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/components.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/custom-functions.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/databases-and-tables.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/index.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/jobs.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/logs.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/nosql-operations.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/quickstart-examples.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/registration.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/sql-operations.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/token-authentication.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/users-and-roles.md delete mode 100644 versioned_docs/version-4.5/developers/operations-api/utilities.md delete mode 100644 versioned_docs/version-4.5/developers/real-time.md delete mode 100644 versioned_docs/version-4.5/developers/replication/index.md delete mode 100644 versioned_docs/version-4.5/developers/replication/sharding.md delete mode 100644 versioned_docs/version-4.5/developers/rest.md delete mode 100644 versioned_docs/version-4.5/developers/security/basic-auth.md delete mode 100644 versioned_docs/version-4.5/developers/security/certificate-management.md delete mode 100644 versioned_docs/version-4.5/developers/security/configuration.md delete mode 100644 versioned_docs/version-4.5/developers/security/index.md delete mode 100644 versioned_docs/version-4.5/developers/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.5/developers/security/mtls-auth.md delete mode 100644 versioned_docs/version-4.5/developers/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.5/index.mdx delete mode 100644 versioned_docs/version-4.5/reference/_category_.json delete mode 100644 versioned_docs/version-4.5/reference/analytics.md delete mode 100644 versioned_docs/version-4.5/reference/architecture.md delete mode 100644 versioned_docs/version-4.5/reference/blob.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/index.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.5/reference/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.5/reference/content-types.md delete mode 100644 versioned_docs/version-4.5/reference/data-types.md delete mode 100644 versioned_docs/version-4.5/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.5/reference/globals.md delete mode 100644 versioned_docs/version-4.5/reference/graphql.md delete mode 100644 versioned_docs/version-4.5/reference/headers.md delete mode 100644 versioned_docs/version-4.5/reference/index.md delete mode 100644 versioned_docs/version-4.5/reference/limits.md delete mode 100644 versioned_docs/version-4.5/reference/query-optimization.md delete mode 100644 versioned_docs/version-4.5/reference/resource.md delete mode 100644 versioned_docs/version-4.5/reference/roles.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/index.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.5/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 versioned_docs/version-4.5/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.5/reference/transactions.md delete mode 100644 versioned_docs/version-4.6/administration/_category_.json delete mode 100644 versioned_docs/version-4.6/administration/administration.md delete mode 100644 versioned_docs/version-4.6/administration/cloning.md delete mode 100644 versioned_docs/version-4.6/administration/compact.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/create-account.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/index.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/instances.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/manage-applications.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/manage-databases-browse-data.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/manage-replication.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/organizations.md delete mode 100644 versioned_docs/version-4.6/administration/harper-studio/query-instance-data.md delete mode 100644 versioned_docs/version-4.6/administration/jobs.md delete mode 100644 versioned_docs/version-4.6/administration/logging/audit-logging.md delete mode 100644 versioned_docs/version-4.6/administration/logging/index.md delete mode 100644 versioned_docs/version-4.6/administration/logging/standard-logging.md delete mode 100644 versioned_docs/version-4.6/administration/logging/transaction-logging.md delete mode 100644 versioned_docs/version-4.6/deployments/_category_.json delete mode 100644 versioned_docs/version-4.6/deployments/configuration.md delete mode 100644 versioned_docs/version-4.6/deployments/harper-cli.md delete mode 100644 versioned_docs/version-4.6/deployments/harper-cloud/alarms.md delete mode 100644 versioned_docs/version-4.6/deployments/harper-cloud/index.md delete mode 100644 versioned_docs/version-4.6/deployments/harper-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.6/deployments/harper-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.6/deployments/harper-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.6/deployments/install-harper/index.md delete mode 100644 versioned_docs/version-4.6/deployments/install-harper/linux.md delete mode 100644 versioned_docs/version-4.6/deployments/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.6/developers/_category_.json delete mode 100644 versioned_docs/version-4.6/developers/applications/caching.md delete mode 100644 versioned_docs/version-4.6/developers/applications/data-loader.md delete mode 100644 versioned_docs/version-4.6/developers/applications/debugging.md delete mode 100644 versioned_docs/version-4.6/developers/applications/define-routes.md delete mode 100644 versioned_docs/version-4.6/developers/applications/defining-roles.md delete mode 100644 versioned_docs/version-4.6/developers/applications/defining-schemas.md delete mode 100644 versioned_docs/version-4.6/developers/applications/index.md delete mode 100644 versioned_docs/version-4.6/developers/applications/web-applications.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/analytics.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/bulk-operations.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/certificate-management.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/clustering-nats.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/clustering.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/components.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/configuration.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/custom-functions.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/databases-and-tables.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/index.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/jobs.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/logs.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/nosql-operations.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/quickstart-examples.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/registration.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/sql-operations.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/system-operations.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/token-authentication.md delete mode 100644 versioned_docs/version-4.6/developers/operations-api/users-and-roles.md delete mode 100644 versioned_docs/version-4.6/developers/real-time.md delete mode 100644 versioned_docs/version-4.6/developers/replication/index.md delete mode 100644 versioned_docs/version-4.6/developers/replication/sharding.md delete mode 100644 versioned_docs/version-4.6/developers/rest.md delete mode 100644 versioned_docs/version-4.6/developers/security/basic-auth.md delete mode 100644 versioned_docs/version-4.6/developers/security/certificate-management.md delete mode 100644 versioned_docs/version-4.6/developers/security/configuration.md delete mode 100644 versioned_docs/version-4.6/developers/security/index.md delete mode 100644 versioned_docs/version-4.6/developers/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.6/developers/security/mtls-auth.md delete mode 100644 versioned_docs/version-4.6/developers/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.6/index.mdx delete mode 100644 versioned_docs/version-4.6/reference/_category_.json delete mode 100644 versioned_docs/version-4.6/reference/analytics.md delete mode 100644 versioned_docs/version-4.6/reference/architecture.md delete mode 100644 versioned_docs/version-4.6/reference/blob.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/index.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.6/reference/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.6/reference/components/applications.md delete mode 100644 versioned_docs/version-4.6/reference/components/built-in-extensions.md delete mode 100644 versioned_docs/version-4.6/reference/components/configuration.md delete mode 100644 versioned_docs/version-4.6/reference/components/extensions.md delete mode 100644 versioned_docs/version-4.6/reference/components/index.md delete mode 100644 versioned_docs/version-4.6/reference/components/plugins.md delete mode 100644 versioned_docs/version-4.6/reference/content-types.md delete mode 100644 versioned_docs/version-4.6/reference/data-types.md delete mode 100644 versioned_docs/version-4.6/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.6/reference/globals.md delete mode 100644 versioned_docs/version-4.6/reference/graphql.md delete mode 100644 versioned_docs/version-4.6/reference/headers.md delete mode 100644 versioned_docs/version-4.6/reference/index.md delete mode 100644 versioned_docs/version-4.6/reference/limits.md delete mode 100644 versioned_docs/version-4.6/reference/resources/index.md delete mode 100644 versioned_docs/version-4.6/reference/resources/instance-binding.md delete mode 100644 versioned_docs/version-4.6/reference/resources/migration.md delete mode 100644 versioned_docs/version-4.6/reference/resources/query-optimization.md delete mode 100644 versioned_docs/version-4.6/reference/roles.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/index.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.6/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 versioned_docs/version-4.6/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.6/reference/transactions.md delete mode 100644 versioned_docs/version-4.7/administration/_category_.json delete mode 100644 versioned_docs/version-4.7/administration/administration.md delete mode 100644 versioned_docs/version-4.7/administration/cloning.md delete mode 100644 versioned_docs/version-4.7/administration/compact.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/create-account.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/index.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/instances.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/manage-applications.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/manage-replication.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/organizations.md delete mode 100644 versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md delete mode 100644 versioned_docs/version-4.7/administration/jobs.md delete mode 100644 versioned_docs/version-4.7/administration/logging/audit-logging.md delete mode 100644 versioned_docs/version-4.7/administration/logging/index.md delete mode 100644 versioned_docs/version-4.7/administration/logging/standard-logging.md delete mode 100644 versioned_docs/version-4.7/administration/logging/transaction-logging.md delete mode 100644 versioned_docs/version-4.7/deployments/_category_.json delete mode 100644 versioned_docs/version-4.7/deployments/configuration.md delete mode 100644 versioned_docs/version-4.7/deployments/harper-cli.md delete mode 100644 versioned_docs/version-4.7/deployments/harper-cloud/alarms.md delete mode 100644 versioned_docs/version-4.7/deployments/harper-cloud/index.md delete mode 100644 versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md delete mode 100644 versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md delete mode 100644 versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md delete mode 100644 versioned_docs/version-4.7/deployments/install-harper/index.md delete mode 100644 versioned_docs/version-4.7/deployments/install-harper/linux.md delete mode 100644 versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md delete mode 100644 versioned_docs/version-4.7/developers/_category_.json delete mode 100644 versioned_docs/version-4.7/developers/applications/caching.md delete mode 100644 versioned_docs/version-4.7/developers/applications/data-loader.md delete mode 100644 versioned_docs/version-4.7/developers/applications/debugging.md delete mode 100644 versioned_docs/version-4.7/developers/applications/define-routes.md delete mode 100644 versioned_docs/version-4.7/developers/applications/defining-roles.md delete mode 100644 versioned_docs/version-4.7/developers/applications/defining-schemas.md delete mode 100644 versioned_docs/version-4.7/developers/applications/index.md delete mode 100644 versioned_docs/version-4.7/developers/applications/web-applications.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/analytics.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/bulk-operations.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/certificate-management.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/clustering-nats.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/clustering.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/components.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/configuration.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/custom-functions.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/index.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/jobs.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/logs.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/nosql-operations.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/registration.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/sql-operations.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/system-operations.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/token-authentication.md delete mode 100644 versioned_docs/version-4.7/developers/operations-api/users-and-roles.md delete mode 100644 versioned_docs/version-4.7/developers/real-time.md delete mode 100644 versioned_docs/version-4.7/developers/replication/index.md delete mode 100644 versioned_docs/version-4.7/developers/replication/sharding.md delete mode 100644 versioned_docs/version-4.7/developers/rest.md delete mode 100644 versioned_docs/version-4.7/developers/security/basic-auth.md delete mode 100644 versioned_docs/version-4.7/developers/security/certificate-management.md delete mode 100644 versioned_docs/version-4.7/developers/security/certificate-verification.md delete mode 100644 versioned_docs/version-4.7/developers/security/configuration.md delete mode 100644 versioned_docs/version-4.7/developers/security/index.md delete mode 100644 versioned_docs/version-4.7/developers/security/jwt-auth.md delete mode 100644 versioned_docs/version-4.7/developers/security/mtls-auth.md delete mode 100644 versioned_docs/version-4.7/developers/security/users-and-roles.md delete mode 100644 versioned_docs/version-4.7/index.mdx delete mode 100644 versioned_docs/version-4.7/reference/_category_.json delete mode 100644 versioned_docs/version-4.7/reference/analytics.md delete mode 100644 versioned_docs/version-4.7/reference/architecture.md delete mode 100644 versioned_docs/version-4.7/reference/blob.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/certificate-management.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/creating-a-cluster-user.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/enabling-clustering.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/establishing-routes.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/index.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/managing-subscriptions.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/naming-a-node.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/requirements-and-definitions.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/subscription-overview.md delete mode 100644 versioned_docs/version-4.7/reference/clustering/things-worth-knowing.md delete mode 100644 versioned_docs/version-4.7/reference/components/applications.md delete mode 100644 versioned_docs/version-4.7/reference/components/built-in-extensions.md delete mode 100644 versioned_docs/version-4.7/reference/components/configuration.md delete mode 100644 versioned_docs/version-4.7/reference/components/extensions.md delete mode 100644 versioned_docs/version-4.7/reference/components/index.md delete mode 100644 versioned_docs/version-4.7/reference/components/plugins.md delete mode 100644 versioned_docs/version-4.7/reference/content-types.md delete mode 100644 versioned_docs/version-4.7/reference/data-types.md delete mode 100644 versioned_docs/version-4.7/reference/dynamic-schema.md delete mode 100644 versioned_docs/version-4.7/reference/globals.md delete mode 100644 versioned_docs/version-4.7/reference/graphql.md delete mode 100644 versioned_docs/version-4.7/reference/headers.md delete mode 100644 versioned_docs/version-4.7/reference/index.md delete mode 100644 versioned_docs/version-4.7/reference/limits.md delete mode 100644 versioned_docs/version-4.7/reference/resources/index.md delete mode 100644 versioned_docs/version-4.7/reference/resources/instance-binding.md delete mode 100644 versioned_docs/version-4.7/reference/resources/migration.md delete mode 100644 versioned_docs/version-4.7/reference/resources/query-optimization.md delete mode 100644 versioned_docs/version-4.7/reference/roles.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/date-functions.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/features-matrix.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/functions.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/index.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/json-search.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/reserved-word.md delete mode 100644 versioned_docs/version-4.7/reference/sql-guide/sql-geospatial-functions.md delete mode 100644 versioned_docs/version-4.7/reference/storage-algorithm.md delete mode 100644 versioned_docs/version-4.7/reference/transactions.md diff --git a/docs/administration/_category_.json b/docs/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/docs/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/docs/administration/administration.md b/docs/administration/administration.md deleted file mode 100644 index 651d8922..00000000 --- a/docs/administration/administration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -Harper is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own Harper servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database Harper is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. Harper provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](administration/logging/audit-logging): Harper defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). Harper has a [`get_backup`](../developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. Harper can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -Harper provides rapid horizontal scaling capabilities through [node cloning functionality described here](administration/cloning). - -### Monitoring - -Harper provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: - -- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](../reference/analytics). -- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](../developers/operations-api/system-operations). -- Information about the current cluster configuration and status can be found in the [cluster APIs](../developers/operations-api/clustering). -- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor Harper with Graphana. - -### Replication Transaction Logging - -Harper utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](administration/logging/transaction-logging). diff --git a/docs/administration/cloning.md b/docs/administration/cloning.md deleted file mode 100644 index b3698092..00000000 --- a/docs/administration/cloning.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that when pointed to another instance of Harper will create a clone of that -instance's config, databases and setup full replication. If it is run in a location where there is no existing Harper install, -it will, along with cloning, install Harper. If it is run in a location where there is another Harper instance, it will -only clone config, databases and replication that do not already exist. - -Clone node is triggered when Harper is installed or started with certain environment or command line (CLI) variables set (see below). - -**Leader node** - the instance of Harper you are cloning.\ -**Clone node** - the new node which will be a clone of the leader node. - -To start clone run `harperdb` in the CLI with either of the following variables set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `REPLICATION_HOSTNAME` - _(optional)_ The clones replication hostname. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 REPLICATION_HOSTNAME=node-2.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--REPLICATION_HOSTNAME` - _(optional)_ The clones clustering host. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --REPLICATION_HOSTNAME node-2.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from -running again. If you want to run clone again set this value to `false`. If Harper is started with the clone variables -still present and `cloned` is true, Harper will just start as normal. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI -variables or cloning overtop of an existing `harperdb-config.yaml` file. - -More can be found in the Harper config documentation [here](../deployments/configuration). - -### Excluding database and components - -To set any specific (optional) clone config, including the exclusion of any database and/or replication, there is a file -called `clone-node-config.yaml` that can be used. - -The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. -If the directory does not exist, create one and add the file to it). - -The config available in `clone-node-config.yaml` is: - -```yaml -databaseConfig: - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -componentConfig: - exclude: - - name: null -``` - -_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, -unless it already exists on the clone._ - -`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. - -`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, -it will only clone the component reference that exists in the leader harperdb-config file. - -### Cloning configuration - -Clone node will not clone any configuration that is classed as unique to the leader node. This includes `replication.hostname`, `replication.url`,`clustering.nodeName`, -`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, -any authentication certificate/key paths. - -### Cloning system database - -Harper uses a database called `system` to store operational information. Clone node will only clone the user and role -tables from this database. It will also set up replication on this table, which means that any existing and future user and roles -that are added will be replicated throughout the cluster. - -Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with -the leader. - -### Replication - -If clone is run with the `REPLICATION_HOSTNAME` variable set, a fully replicating clone will be created. - -If any databases are excluded from the clone, replication will not be set up on these databases. - -### JWT Keys - -If cloning with replication, the leader's JWT private and public keys will be cloned. To disable this, include `CLONE_KEYS=false` in your clone variables. - -### Cloning overtop of an existing Harper instance - -Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication -that does not exist on the node it is running on. - -An example of how this can be useful is if you want to set Harper config before the clone is created. To do this you -would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then -when clone is run it will append the missing config to the file and install Harper with the desired config. - -Another useful example could be retroactively adding another database to an existing instance. Running clone on -an existing instance could create a full clone of another database and set up replication between the database on the -leader and the clone. - -### Cloning steps - -Clone node will execute the following steps when ran: - -1. Look for an existing Harper install. It does this by using the default (or user provided) `ROOTPATH`. -1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start Harper. -1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). -1. Fully clone any databases that don't already exist. -1. If classed as a "fresh clone", install Harper. An instance is classed as a fresh clone if there is no system database. -1. If `REPLICATION_HOSTNAME` is set, set up replication between the leader and clone. -1. Clone is complete, start Harper. - -### Cloning with Docker - -To run clone inside a container add the environment variables to your run command. - -For example: - -``` -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_LEADER_PASSWORD=password \ - -e HDB_LEADER_USERNAME=admin \ - -e HDB_LEADER_URL=https://1.123.45.6:9925 \ - -e REPLICATION_HOSTNAME=1.123.45.7 \ - -p 9925:9925 \ - -p 9926:9926 \ - harperdb/harperdb -``` - -Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/docs/administration/compact.md b/docs/administration/compact.md deleted file mode 100644 index 1a71db14..00000000 --- a/docs/administration/compact.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Compact ---- - -# Compact - -Database files can grow quickly as you use them, sometimes impeding performance. Harper has multiple compact features that can be used to reduce database file size and potentially improve performance. The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. - -There are two options that Harper offers for compacting a Database. - -_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ - -More information on the storage configuration options can be [found here](../deployments/configuration#storage) - -### Copy compaction - -It is recommended that, to prevent any record loss, Harper is not running when performing this operation. - -This will copy a Harper database with compaction. If you wish to use this new database in place of the original, you will need to move/rename it to the path of the original database. - -This command should be run in the [CLI](../deployments/harper-cli) - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - -### Compact on start - -Compact on start is a more automated option that will compact **all** databases when Harper is started. Harper will not start until compact is complete. Under the hood it loops through all non-system databases, creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database to where the original one is located and remove any backups. - -Compact on start is initiated by config in `harperdb-config.yaml` - -_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ - -`compactOnStart` - _Type_: boolean; _Default_: false - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -Using CLI variables - -```bash ---STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true -``` - -```bash -STORAGE_COMPACTONSTART=true -STORAGE_COMPACTONSTARTKEEPBACKUP=true -``` diff --git a/docs/administration/harper-studio/create-account.md b/docs/administration/harper-studio/create-account.md deleted file mode 100644 index e1ffbb87..00000000 --- a/docs/administration/harper-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [Harper Studio sign up page](https://fabric.harper.fast/#/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -1. Review the Privacy Policy and Terms of Service. -1. Click the sign up for free button. -1. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -1. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/docs/administration/harper-studio/enable-mixed-content.md b/docs/administration/harper-studio/enable-mixed-content.md deleted file mode 100644 index 2530fef0..00000000 --- a/docs/administration/harper-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -If you want to connect insecure HTTP instances from the secure HTTPS Fabric Studio, you can enable mixed content temporarily. This isn't recommended in production systems. It would be better to add HTTPS / SSL Termination in front of your instances. But if you understand the risks, you can enable mixed content. Enabling mixed content is required in cases where you would like to connect the Harper Studio to Harper Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/docs/administration/harper-studio/index.md b/docs/administration/harper-studio/index.md deleted file mode 100644 index 75f4ccfb..00000000 --- a/docs/administration/harper-studio/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Harper Studio ---- - -# Harper Studio - -Harper Studio is the web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user-friendly interface without any knowledge of the underlying Harper API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - -Harper now includes a simplified local Studio that is packaged with all Harper installations and served directly from the instance. It can be enabled in the [configuration file](../../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). - ---- - -## How does Studio Work? - -While Harper Studio is web based and hosted by us, all database interactions are performed on the Harper instance the studio is connected to. The Harper Studio loads in your browser, at which point you login to your Harper instances. Credentials are stored in your browser cache and are not transmitted back to Harper. All database interactions are made via the Harper Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -Harper Studio enables users to manage both Harper Cloud instances and privately hosted instances all from a single UI. All Harper instances feature identical behavior whether they are hosted by us or by you. diff --git a/docs/administration/harper-studio/instance-configuration.md b/docs/administration/harper-studio/instance-configuration.md deleted file mode 100644 index 06a6eb89..00000000 --- a/docs/administration/harper-studio/instance-configuration.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -Harper instance configuration can be viewed and managed directly through the Harper Studio. Harper Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Applications URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in Harper database user_ - -- Created Date (Harper Cloud only) - -- Region (Harper Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (Harper Cloud only) - -- Disk IOPS (Harper Cloud only) - -## Update Instance RAM - -Harper Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For Harper Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -1. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if Harper Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The Harper Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -1. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. -1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The Harper instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -1. The instance will begin deleting immediately. - -## Restart Instance - -The Harper Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -1. The instance will begin restarting immediately. - -## Instance Config (Read Only) - -A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/docs/administration/harper-studio/instance-metrics.md b/docs/administration/harper-studio/instance-metrics.md deleted file mode 100644 index e9b48939..00000000 --- a/docs/administration/harper-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The Harper Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [Harper logs](../logging/standard-logging), and Harper Cloud alarms (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/docs/administration/harper-studio/instances.md b/docs/administration/harper-studio/instances.md deleted file mode 100644 index b367ed96..00000000 --- a/docs/administration/harper-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The Harper Studio allows you to administer all of your HarperDinstances in one place. Harper currently offers the following instance types: - -- **Harper Cloud Instance** Managed installations of Harper, what we call [Harper Cloud](../../deployments/harper-cloud/). -- **5G Wavelength Instance** Managed installations of Harper running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ -- **Enterprise Instance** Any Harper installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. Harper stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the Harper instances using the standard [Harper API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. Harper Cloud and Enterprise instances are listed together. - -## Create a New Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select your desired Instance Type. -1. For a Harper Cloud Instance or a Harper 5G Wavelength Instance, click **Create Harper Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial Harper instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial Harper instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ _More on instance specs\_\_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your Harper data will reside. Storage is provisioned based on space and IOPS._ _More on IOPS Impact on Performance\_\_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your Harper Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register Enterprise Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select **Register Enterprise Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Instance Password - - _The password of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Host - - _The host to access the Harper instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the Harper instance. Harper defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The Harper Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **Harper Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **Enterprise Instance** The instance will be removed from the Harper Studio only. This does not uninstall Harper from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -Harper instances can be resized on the [Instance Configuration](instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a Harper user that is already configured in your Harper instance._ - -1. Enter the database password. - - _The password of a Harper user that is already configured in your Harper instance._ - -1. Click **Log In**. diff --git a/docs/administration/harper-studio/login-password-reset.md b/docs/administration/harper-studio/login-password-reset.md deleted file mode 100644 index 199d38ce..00000000 --- a/docs/administration/harper-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your Harper Studio Account - -To log into your existing Harper Studio account: - -1. Navigate to the [Harper Studio](https://studio.harperdb.io/). -1. Enter your email address. -1. Enter your password. -1. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the Harper Studio password reset page. -1. Enter your email address. -1. Click **send password reset email**. -1. If the account exists, you will receive an email with a temporary password. -1. Navigate back to the Harper Studio login page. -1. Enter your email address. -1. Enter your temporary password. -1. Click **sign in**. -1. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -1. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the Harper Studio profile page. -1. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -1. Click the **Update Password** button. diff --git a/docs/administration/harper-studio/manage-applications.md b/docs/administration/harper-studio/manage-applications.md deleted file mode 100644 index 52e8cc64..00000000 --- a/docs/administration/harper-studio/manage-applications.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Manage Applications ---- - -# Manage Applications - -[Harper Applications](../../developers/applications/) are enabled by default and can be configured further through the Harper Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of Harper Applications behavior. - -All Applications configuration and development is handled through the **applications** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **applications** in the instance control bar. - -_Note, the **applications** page will only be available to super users._ - -## Manage Applications - -The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your Harper Applications. The editor provides the ability to create new applications or import/deploy remote application packages. - -The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. - -The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the Harper Studio. - -## Things to Keep in Mind - -To learn more about developing Harper Applications, make sure to read through the [Applications](../../developers/applications/) documentation. - -When working with Applications in the Harper Studio, by default the editor will restart the Harper Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the Harper logs, which can be found on the [status page](instance-metrics). - -The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your Harper instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your Harper instance click the "revert" button. - -## Accessing Your Application Endpoints - -Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [Harper HTTP port found in the Harper configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. - -Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. - -- **Standard REST Endpoints**\ - Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. -- **Augmented REST Endpoints**\ - Harper Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. -- **Fastify Routes**\ - If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [Harper Application Template](https://github.com/HarperDB/application-template/), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. - -## Creating a New Application - -1. From the application page, click the "+ app" button at the top right. -1. Click "+ Create A New Application Using The Default Template". -1. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. -1. Click OK. -1. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. - -## Editing an Application - -1. From the applications page, click the file you would like to edit from the file navigator on the left. -1. Edit the file with any changes you'd like. -1. Click "save" at the top right. Note, as mentioned above, when you save a file, the Harper Applications server will be restarted immediately. diff --git a/docs/administration/harper-studio/manage-databases-browse-data.md b/docs/administration/harper-studio/manage-databases-browse-data.md deleted file mode 100644 index 33482198..00000000 --- a/docs/administration/harper-studio/manage-databases-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Databases / Browse Data ---- - -# Manage Databases / Browse Data - -Manage instance databases/tables and browse data in tabular format with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage databases and tables, add new data, and more. - -## Manage Databases and Tables - -#### Create a Database - -1. Click the plus icon at the top right of the databases section. -1. Enter the database name. -1. Click the green check mark. - -#### Delete a Database - -Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. - -1. Click the minus icon at the top right of the databases section. -1. Identify the appropriate database to delete and click the red minus sign in the same row. -1. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired database from the databases section. -1. Click the plus icon at the top right of the tables section. -1. Enter the table name. -1. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -1. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired database from the databases section. -1. Click the minus icon at the top right of the tables section. -1. Identify the appropriate table to delete and click the red minus sign in the same row. -1. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -1. This expands the search filters. -1. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -1. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 1. Click **Import From URL**. - 1. The CSV will load, and you will be redirected back to browse table data. -1. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 1. Navigate to your desired CSV file and select it. - 1. Click **Insert X Records**, where X is the number of records in your CSV. - 1. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -1. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -1. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -1. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -1. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -1. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -1. Click the **delete icon**. -1. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/docs/administration/harper-studio/manage-instance-roles.md b/docs/administration/harper-studio/manage-instance-roles.md deleted file mode 100644 index 3662013c..00000000 --- a/docs/administration/harper-studio/manage-instance-roles.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance role configuration is handled through the **roles** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **roles** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -1. Enter the role name. - -1. Click the green check mark. - -1. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. - -1. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -1. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -1. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -1. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the roles section. - -1. Identify the appropriate role to delete and click the red minus sign in the same row. - -1. Click the red check mark to confirm deletion. diff --git a/docs/administration/harper-studio/manage-instance-users.md b/docs/administration/harper-studio/manage-instance-users.md deleted file mode 100644 index fb91fbbb..00000000 --- a/docs/administration/harper-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance user configuration is handled through the **users** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -Harper instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](manage-instance-roles)._ - -1. Click **Add User**. - -## Edit a User - -Harper instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -1. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 1. Click **Update Password**. - -1. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 1. Click **Update Role**. - -1. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 1. Click **Delete User**. diff --git a/docs/administration/harper-studio/manage-replication.md b/docs/administration/harper-studio/manage-replication.md deleted file mode 100644 index af69f9e1..00000000 --- a/docs/administration/harper-studio/manage-replication.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Manage Replication ---- - -# Manage Replication - -Harper instance clustering and replication can be configured directly through the Harper Studio. It is recommended to read through the [clustering documentation](../../reference/clustering/) first to gain a strong understanding of Harper clustering behavior. - -All clustering configuration is handled through the **replication** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **replication** in the instance control bar. - -Note, the **replication** page will only be available to super users. - ---- - -## Initial Configuration - -Harper instances do not have clustering configured by default. The Harper Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../reference/clustering/creating-a-cluster-user) document before proceeding. - -1. Enter Cluster User username. (Defaults to `cluster_user`). -1. Enter Cluster Password. -1. Review and/or Set Cluster Node Name. -1. Click **Enable Clustering**. - -At this point the Studio will restart your Harper Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. - ---- - -## Connect an Instance - -Harper Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -1. Identify the instance you would like to connect from the **unconnected instances** panel. - -1. Click the plus icon next the appropriate instance. - -1. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -Harper Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -1. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, database, and table for replication to be configured. - -1. For publish, click the toggle switch in the **publish** column. - -1. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/docs/administration/harper-studio/organizations.md b/docs/administration/harper-studio/organizations.md deleted file mode 100644 index f93eeff0..00000000 --- a/docs/administration/harper-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -Harper Studio organizations provide the ability to group Harper Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique Harper Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for Harper Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the Harper Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the **Create a New Organization** card. -1. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -1. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the Harper Studio Organizations page. -1. Identify the proper organization card and click the trash can icon. -1. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -1. Click the **Do It** button. - -## Manage Users - -Harper Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. In the **add user** box, enter the new user’s email address. -1. Click **Add User**. - -Users may or may not already be Harper Studio users when adding them to an organization. If the Harper Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a Harper Studio account, they will receive an email welcoming them to Harper Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. Click the appropriate user from the **existing users** section. -1. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their Harper Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. Click the appropriate user from the **existing users** section. -1. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -1. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_Harper billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -1. Click **Add Coupon**. -1. The coupon will then be available and displayed in the coupons panel. diff --git a/docs/administration/harper-studio/query-instance-data.md b/docs/administration/harper-studio/query-instance-data.md deleted file mode 100644 index e85f5e15..00000000 --- a/docs/administration/harper-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the Harper Studio with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **query** in the instance control bar. -1. Enter your SQL query in the SQL query window. -1. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The Harper Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/docs/administration/jobs.md b/docs/administration/jobs.md deleted file mode 100644 index c487f424..00000000 --- a/docs/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -Harper Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](../developers/operations-api/bulk-operations#csv-data-load) - -[csv file load](../developers/operations-api/bulk-operations#csv-file-load) - -[csv url load](../developers/operations-api/bulk-operations#csv-url-load) - -[import from s3](../developers/operations-api/bulk-operations#import-from-s3) - -[delete_records_before](../developers/operations-api/bulk-operations#delete-records-before) - -[export_local](../developers/operations-api/bulk-operations#export-local) - -[export_to_s3](../developers/operations-api/bulk-operations#export-to-s3) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/docs/administration/logging/audit-logging.md b/docs/administration/logging/audit-logging.md deleted file mode 100644 index 209b4981..00000000 --- a/docs/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart Harper for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [Harper API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/docs/administration/logging/index.md b/docs/administration/logging/index.md deleted file mode 100644 index bde1870a..00000000 --- a/docs/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -Harper provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): Harper maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): Harper uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): Harper stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/docs/administration/logging/standard-logging.md b/docs/administration/logging/standard-logging.md deleted file mode 100644 index 044c2260..00000000 --- a/docs/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -Harper maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the Harper application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -Harper clustering utilizes two [NATS](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of Harper and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -Harper logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the Harper API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/docs/administration/logging/transaction-logging.md b/docs/administration/logging/transaction-logging.md deleted file mode 100644 index 99222e42..00000000 --- a/docs/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -Harper offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. Harper leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering/). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/docs/deployments/_category_.json b/docs/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/docs/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/docs/deployments/configuration.md b/docs/deployments/configuration.md deleted file mode 100644 index 345113fb..00000000 --- a/docs/deployments/configuration.md +++ /dev/null @@ -1,1556 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -Some configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase, such as `operationsApi`. - -To change a configuration value, edit the `harperdb-config.yaml` file and save any changes. **HarperDB must be restarted for changes to take effect.** - -Alternatively, all configuration values can also be modified using environment variables, command line arguments, or the operations API via the [`set_configuration` operation](../developers/operations-api/configuration#set-configuration). - -For nested configuration elements, use underscores to represent parent-child relationships. When accessed this way, elements are case-insensitive. - -For example, to disable logging rotation in the `logging` section: - -```yaml -logging: - rotation: - enabled: false -``` - -You could apply this change using: - -- Environment variable: `LOGGING_ROTATION_ENABLED=false` -- Command line variable: `--LOGGING_ROTATION_ENABLED false` -- Operations API (`set_configuration`): `logging_rotation_enabled: false` - -To change the `port` in the `http` section, use: - -- Environment variable: `HTTP_PORT=` -- Command line variable: `--HTTP_PORT ` -- Operations API (`set_configuration`): `http_port: ` - -To set the `operationsApi.network.port` to `9925`, use: - -- Environment variable: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variable: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Operations API (`set_configuration`): `operationsApi_network_port: 9925` - -_Note: Component configuration cannot be added or updated via CLI or ENV variables._ - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install Harper overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - -## Environment Variable-Based Configuration - -Harper provides two special environment variables for managing configuration: `HARPER_DEFAULT_CONFIG` and `HARPER_SET_CONFIG`. These variables allow you to configure Harper instances through environment variables using JSON-formatted configuration objects. - -### Overview - -Both environment variables accept JSON-formatted configuration that mirrors the structure of `harperdb-config.yaml`: - -```bash -export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' -export HARPER_SET_CONFIG='{"authentication":{"enabled":true}}' -``` - -The key difference between these variables is their precedence and behavior when configuration changes: - -| Feature | HARPER_DEFAULT_CONFIG | HARPER_SET_CONFIG | -| --------------- | ----------------------------- | ------------------------------- | -| **Purpose** | Provide sensible defaults | Force critical settings | -| **Precedence** | Lower (respects user edits) | Highest (always overrides) | -| **User edits** | Respected after detection | Always overridden | -| **Key removal** | Restores original values | Deletes values | -| **Use case** | Installation/runtime defaults | Security/compliance enforcement | - -### HARPER_DEFAULT_CONFIG - -`HARPER_DEFAULT_CONFIG` provides default configuration values while respecting user modifications. This is ideal for scenarios where you want to provide sensible defaults without preventing administrators from customizing their instances. - -#### Behavior - -**At installation time:** - -- Overrides template default values -- Respects values set by `HARPER_SET_CONFIG` -- Respects values from existing config files (when using `HDB_CONFIG`) - -**At runtime:** - -- Only updates values it originally set -- Automatically detects and respects manual user edits to the config file -- When a key is removed from the environment variable, the original value is restored - -#### Example: Setting Default Port - -```bash -# Set default port and logging level -export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' - -# Install and start Harper -npm install -g harperdb -harperdb - -# The config file will have port 8080 and info logging - -# If an administrator manually edits the config to use port 9000, -# Harper will detect this change and respect it on subsequent restarts - -# If you remove http.port from the env var later: -export HARPER_DEFAULT_CONFIG='{"logging":{"level":"info"}}' -# The port will be restored to its original template default (9925) -``` - -### HARPER_SET_CONFIG - -`HARPER_SET_CONFIG` forces configuration values that must never be changed by users. This is designed for security policies, compliance requirements, or critical operational settings that need to be enforced across all instances. - -#### Behavior - -**At runtime:** - -- Always overrides all other configuration sources -- Takes precedence over user edits, file values, and `HARPER_DEFAULT_CONFIG` -- When a key is removed from the environment variable, it's deleted from the config (no restoration) - -#### Example: Enforce Security Settings - -```bash -# Force authentication and specific logging for compliance -export HARPER_SET_CONFIG='{"authentication":{"enabled":true},"logging":{"level":"error","stdStreams":true}}' - -# Install and start Harper -npm install -g harperdb -harperdb - -# Any attempt to change these values in harperdb-config.yaml will be -# overridden on the next restart. The SET_CONFIG values always win. - -# If you later remove authentication from SET_CONFIG: -export HARPER_SET_CONFIG='{"logging":{"level":"error","stdStreams":true}}' -# The authentication section will be removed from the config entirely -``` - -### Combining Both Variables - -You can use both environment variables together for maximum flexibility: - -```bash -# Provide sensible defaults for most settings -export HARPER_DEFAULT_CONFIG='{"http":{"port":8080,"cors":true},"logging":{"level":"info"}}' - -# But enforce critical security settings that cannot be changed -export HARPER_SET_CONFIG='{"authentication":{"enabled":true,"sessionTokenExpiration":3600}}' -``` - -In this scenario: - -- Administrators can customize the HTTP port, CORS settings, and logging level -- Authentication settings are always enforced and cannot be changed - -### Configuration Precedence - -The complete configuration precedence order (highest to lowest): - -1. **HARPER_SET_CONFIG** - Always wins -2. **User manual edits** - Detected through drift detection -3. **HARPER_DEFAULT_CONFIG** - Applied if no user edits detected -4. **File defaults** - Original template values - -### State Tracking - -Harper maintains a state file at `{rootPath}/backup/.harper-config-state.json` to track the source of each configuration value. This enables: - -- **Drift detection**: Identifying when users manually edit values set by `HARPER_DEFAULT_CONFIG` -- **Restoration**: Restoring original values when keys are removed from `HARPER_DEFAULT_CONFIG` -- **Conflict resolution**: Determining which source should take precedence - -### Important Notes - -- Both environment variables must contain valid JSON matching the structure of `harperdb-config.yaml` -- Configuration validation occurs after environment variables are applied -- Invalid values will be caught by Harper's configuration validator -- Changes to these environment variables require a Harper restart to take effect -- The state file is specific to each Harper instance (stored in the root path) - -### Format Reference - -The JSON structure mirrors the YAML configuration file. For example: - -**YAML format:** - -```yaml -http: - port: 8080 - cors: true -logging: - level: info - rotation: - enabled: true -``` - -**Environment variable format:** - -```json -{ "http": { "port": 8080, "cors": true }, "logging": { "level": "info", "rotation": { "enabled": true } } } -``` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -Harper is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using Harper to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using Harper behind a proxy server or application server, all the remote ip addresses will be the same and Harper will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`corsAccessControlAllowHeaders` - _Type_: string; _Default_: 'Accept, Content-Type, Authorization' - -A string representation of a comma separated list of header keys for the [Access-Control-Allow-Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) header for OPTIONS requests. - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`maxHeaderSize` - _Type_: integer; _Default_: 16394 - -The maximum allowed size of HTTP headers in bytes. - -`requestQueueLimit` - _Type_: integer; _Default_: 20000 - -The maximum estimated request queue time, in milliseconds. When the queue is above this limit, requests will be rejected with a 503. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper component server uses for HTTPS connections. This requires a valid certificate and key. - -`http2` - _Type_: boolean; _Default_: false - -Enables HTTP/2 for the HTTP server. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - maxHeaderSize: 8192 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - -`mtls` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for HTTP mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -**Note:** MQTT has its own `mqtt.network.mtls.user` setting (see [MQTT configuration](#mqtt)). - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming HTTP connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -**Note:** MQTT has its own `mqtt.network.mtls.required` setting (see [MQTT configuration](#mqtt)). Replication uses node-based authentication via certificates or IP addresses, with credential-based fallback (see [Securing Replication Connections](../developers/replication/#securing-connections)). - -`certificateVerification` - _Type_: boolean | object; _Default_: false (disabled) - -When mTLS is enabled, Harper can verify the revocation status of client certificates using CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol). This ensures that revoked certificates cannot be used for authentication. - -**Certificate verification is disabled by default** and must be explicitly enabled for production environments where certificate revocation checking is required. - -Set to `true` to enable with defaults, `false` to disable, or configure with an object: - -**Global Settings:** - -- `failureMode` - _Type_: string; _Default_: 'fail-closed' - Global behavior when verification fails: - - `'fail-open'`: Allow connection on verification failure (logs warning) - - `'fail-closed'`: Reject connection on verification failure (recommended) - -**CRL Configuration:** (enabled by default when certificateVerification is enabled) - -- `crl.enabled` - _Type_: boolean; _Default_: true - Enable/disable CRL checking -- `crl.timeout` - _Type_: number; _Default_: 10000 - Maximum milliseconds to wait for CRL download -- `crl.cacheTtl` - _Type_: number; _Default_: 86400000 - Milliseconds to cache CRL (24 hours) -- `crl.gracePeriod` - _Type_: number; _Default_: 86400000 - Grace period after CRL nextUpdate (24 hours) -- `crl.failureMode` - _Type_: string; _Default_: 'fail-closed' - CRL-specific failure mode - -**OCSP Configuration:** (enabled by default as fallback when certificateVerification is enabled) - -- `ocsp.enabled` - _Type_: boolean; _Default_: true - Enable/disable OCSP checking -- `ocsp.timeout` - _Type_: number; _Default_: 5000 - Maximum milliseconds to wait for OCSP response -- `ocsp.cacheTtl` - _Type_: number; _Default_: 3600000 - Milliseconds to cache successful OCSP responses (1 hour) -- `ocsp.errorCacheTtl` - _Type_: number; _Default_: 300000 - Milliseconds to cache OCSP errors (5 minutes) -- `ocsp.failureMode` - _Type_: string; _Default_: 'fail-closed' - OCSP-specific failure mode - -**Verification Strategy:** -Harper uses a CRL-first strategy with OCSP fallback. When a client certificate is presented: - -1. Check CRL if available (fast, cached locally) -2. Fall back to OCSP if CRL is not available or fails -3. Apply the configured failure mode if both methods fail - -Example configurations: - -```yaml -# Basic mTLS without certificate verification (certificate revocation not checked) -http: - mtls: true -``` - -```yaml -# mTLS with certificate verification enabled (recommended for production) -http: - mtls: - certificateVerification: true # Uses all defaults (CRL + OCSP, fail-closed) -``` - -```yaml -# Require mTLS for all connections + certificate verification -http: - mtls: - required: true # Reject connections without valid client certificate - certificateVerification: true -``` - -```yaml -# mTLS with custom verification settings for high-security environments -http: - mtls: - certificateVerification: - failureMode: fail-closed # Global setting - crl: - timeout: 15000 # 15 seconds for CRL download - cacheTtl: 43200000 # Cache CRLs for 12 hours - gracePeriod: 86400000 # 24 hour grace period - ocsp: - timeout: 8000 # 8 seconds for OCSP response - cacheTtl: 7200000 # Cache results for 2 hours -``` - -```yaml -# mTLS with CRL only (no OCSP fallback) -http: - mtls: - certificateVerification: - ocsp: false # Disable OCSP, CRL remains enabled -``` - ---- - -### `threads` - -The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: - -`count` - _Type_: number; _Default_: One less than the number of logical cores/processors - -The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because Harper does have other threads at work), assuming Harper is the main service on a server. - -```yaml -threads: - count: 11 -``` - -`debug` - _Type_: boolean | object; _Default_: false - -This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. - -`debug.port` - The port to use for debugging the main thread `debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. `debug.host` - Specify the host interface to listen on `debug.waitForDebugger` - Wait for debugger before starting - -```yaml -threads: - debug: - port: 9249 -``` - -`maxHeapMemory` - _Type_: number; - -```yaml -threads: - maxHeapMemory: 300 -``` - -This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. - -`heapSnapshotNearLimit` - _Type_: boolean; - -```yaml -threads: - heapSnapshotNearLimit: true -``` - -This specifies that a heap snapshot should be taken when the heap limit is near the limit. - ---- - -### `replication` - -The `replication` section configures [Harper replication](../developers/replication/), which is used to create Harper clusters and replicate data between the instances. - -```yaml -replication: - hostname: server-one - url: wss://server-one:9925 - databases: '*' - routes: - - wss://server-two:9925 - port: null - securePort: 9933, - enableRootCAs: true -``` - -`hostname` - _Type_: string; - -The hostname of the current Harper instance. - -`url` - _Type_: string; - -The URL of the current Harper instance. - -`databases` - _Type_: string/array; _Default_: "\*" (all databases) - -Configure which databases to replicate. This can be a string for all database or an array for specific databases. The list can be a simple array of database names: - -```yaml -replication: - databases: - - system - - data - - mydb -``` - -The database list can also specify databases that are purely "sharded" databases. For databases that are marked as sharded, replication will _only_ create database subscription connections to nodes in the same shard. Sharding can still function without this setting, since the residency location for sharding can be determined for each table or each record. However, using this setting will reduce the overhead of connections in situations where all data is uniformly sharded, creating a simpler and more efficient replication topology. To mark databases as sharded, you can specify a list of databases with a `name` and `sharded` flag: - -```yaml -replication: - databases: - - name: system - - name: data - sharded: true -``` - -`routes` - _Type_: array; - -An array of routes to connect to other nodes. Each element in the array can be either a string or an object with `hostname`, `port` and optionally `startTime` properties. - -`startTime` - _Type_: string; ISO formatted UTC date string. - -Replication will attempt to catch up on all remote data upon setup. To start replication from a specific date, set this property. - -`revokedCertificates` - _Type_: array; - -An array of serial numbers of revoked certificates. If a connection is attempted with a certificate that is in this list, the connection will be rejected. - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9925 # URL based route - - hostname: server-three # define a hostname and port - port: 9930 - startTime: 2024-02-06T15:30:00Z - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -`port` - _Type_: integer; - -The port to use for replication connections. - -`securePort` - _Type_: integer; _Default_: 9933 - -The port to use for secure replication connections. - -`enableRootCAs` - _Type_: boolean; _Default_: true - -When true, Harper will verify certificates against the Node.js bundled CA store. The bundled CA store is a snapshot of the Mozilla CA store that is fixed at release time. - -`mtls` - _Type_: object; - -Configures mTLS settings for replication connections. **mTLS is always required for replication** and cannot be disabled (for security reasons). You can configure certificate verification settings: - -```yaml -replication: - mtls: - certificateVerification: true # Enable certificate revocation checking -``` - -`certificateVerification` - _Type_: boolean | object; _Default_: false (disabled) - -When enabled, Harper will verify the revocation status of replication peer certificates using CRL and/or OCSP. This follows the same configuration structure as [HTTP certificate verification](#http) documented above. - -**Important:** mTLS itself is always enabled for replication connections and cannot be disabled. This setting only controls whether certificate revocation checking (CRL/OCSP) is performed. - -Example configurations: - -```yaml -# Replication with mTLS but no certificate verification (default) -replication: - hostname: server-one - routes: - - server-two - # mTLS is always enabled, certificate verification is optional -``` - -```yaml -# Replication with certificate verification enabled (recommended for production) -replication: - hostname: server-one - routes: - - server-two - mtls: - certificateVerification: true # Uses CRL and OCSP with defaults -``` - -```yaml -# Replication with custom certificate verification settings -replication: - hostname: server-one - routes: - - server-two - mtls: - certificateVerification: - crl: - timeout: 15000 - cacheTtl: 43200000 - ocsp: - timeout: 8000 -``` - -Certificate verification can also be configured via environment variables: - -```bash -REPLICATION_MTLS_CERTIFICATEVERIFICATION=true -REPLICATION_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed -REPLICATION_MTLS_CERTIFICATEVERIFICATION_CRL=true -REPLICATION_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 -REPLICATION_MTLS_CERTIFICATEVERIFICATION_OCSP=true -``` - -`blobTimeout` - _Type_: number; _Default_: 120000 - -Amount of time to wait for a blob to be transferred before timing out, measured in milliseconds. - -`failOver` - _Type_: boolean; _Default_: true - -When true, Harper will attempt to fail-over to subscribing to a different node if the current node is unreachable, to reach consistency. - -`shard` - _Type_: integer; - -This defines the shard id of this instance and is used in conjunction with the [Table Resource functions](../developers/replication/sharding#custom-sharding) `setResidency` & `setResidencyById` to programmatically route traffic to the proper shard. - ---- - -### `clustering` using NATS - -The `clustering` section configures the NATS clustering engine, this is used to replicate data between instances of Harper. - -_Note: There exist two ways to create clusters and replicate data in Harper. One option is to use native Harper replication over Websockets. The other option is to use_ [_NATS_](https://nats.io/about/) _to facilitate the cluster._ - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the Harper mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - -```yaml -clustering: - leafServer: - streams: - maxConsumeMsgs: 100 - maxIngestThreads: 2 -``` - -`maxConsumeMsgs` - _Type_: integer; _Default_: 100 - -The maximum number of messages a consumer can process in one go. - -`maxIngestThreads` - _Type_: integer; _Default_: 2 - -The number of Harper threads that are delegated to ingesting messages. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your Harper cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special Harper user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local Harper Studio, a GUI for Harper hosted on the server. A hosted version of the Harper Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or Harper Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures Harper logging across all Harper functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root` or `logging.path`). Many of the logging configuration properties can be set and applied without a restart (are dynamically applied). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: warn - -Control the verbosity of text event logs. - -```yaml -logging: - level: warn -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`console` - _Type_: boolean; _Default_: true - -Controls whether console.log and other console.\* calls (as well as another JS components that writes to `process.stdout` and `process.stderr`) are logged to the log file. By default, these are not logged to the log file, but this can be enabled: - -```yaml -logging: - console: true -``` - -`root` - _Type_: string; _Default_: \/log - -The directory path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`path` - _Type_: string; _Default_: \/log/hdb.log - -The file path where the log file will be written. - -```yaml -logging: - root: ~/hdb/log/hdb.log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: true - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log Harper logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - -`auditAuthEvents` - -`logFailed` - _Type_: boolean; _Default_: false - -Log all failed authentication events. - -_Example:_ `[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -`logSuccessful` - _Type_: boolean; _Default_: false - -Log all successful authentication events. - -_Example:_ `[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -```yaml -logging: - auditAuthEvents: - logFailed: false - logSuccessful: false -``` - -#### Defining Separate Logging Configurations - -Harper's logger supports defining multiple logging configurations for different components in the system. Each logging configuration can be assigned its own `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. All logging defaults to the configuration of the "main" logger as configured above, but when logging is configured for different loggers, they will use their own configuration. Separate loggers can be defined: - -`logging.external` - -The `logging.external` section can be used to define logging for all external components that use the [`logger` API](../reference/globals). For example: - -```yaml -logging: - external: - level: warn - path: ~/hdb/log/apps.log -``` - -`http.logging` - -This section defines log configuration for HTTP logging. By default, HTTP requests are not logged, but defining this section will enable HTTP logging. Note that there can be substantive overhead to logging all HTTP requests. In addition to the standard logging configuration, the `http.logging` section also allows the following configuration properties to be set: - -- `timing` - This will log timing information -- `headers` - This will log the headers in each request (which can be very verbose) -- `id` - This will assign a unique id to each request and log it in the entry for each request. This is assigned as the `request.requestId` property and can be used to by other logging to track a request. - Note that the `level` will determine which HTTP requests are logged: -- `info` (or more verbose) - All HTTP requests -- `warn` - HTTP requests with a status code of 400 or above -- `error` - HTTP requests with a status code of 500 - -For example: - -```yaml -http: - logging: - timing: true - level: info - path: ~/hdb/log/http.log - ... rest of http config -``` - -`authentication.logging` - -This section defines log configuration for authentication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`mqtt.logging` - -This section defines log configuration for MQTT. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`replication.logging` - -This section defines log configuration for replication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`tls.logging` - -This section defines log configuration for TLS. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`storage.logging` - -This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`storage.logging` - -This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`analytics.logging` - -This section defines log configuration for analytics. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in Harper. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any Harper servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the Harper Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - domainSocket: /user/hdb/operations-server - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server - -The path to the Unix domain socket used to provide the Operations API through the CLI - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the Harper operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The Harper database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the Harper application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: true - -The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`compression.dictionary` _Type_: number; _Default_: null - -Path to a compression dictionary file - -`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` - -Only entries that are larger than this value (in bytes) will be compressed. - -```yaml -storage: - compression: - dictionary: /users/harperdb/dict.txt - threshold: 1000 -``` - -`compactOnStart` - _Type_: boolean; _Default_: false - -When `true` all non-system databases will be compacted when starting Harper, read more [here](../administration/compact). - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -Keep the backups made by compactOnStart. - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -`maxTransactionQueueTime` - _Type_: time; _Default_: 45s - -The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). - -```yaml -storage: - maxTransactionQueueTime: 2m -``` - -`noReadAhead` - _Type_: boolean; _Default_: false - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/database` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by Harper. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - -`blobPaths` - _Type_: string; _Default_: `/blobs` - -The `blobPaths` configuration sets where all the blob files should reside. This can be an array of paths, and if there are multiple, the blobs will be distributed across the paths. - -```yaml -storage: - blobPaths: - - /users/harperdb/big-storage -``` - -`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS - -Defines the page size of the database. - -```yaml -storage: - pageSize: 4096 -``` - -`reclamation` - -The reclamation section provides configuration for the reclamation process, which is responsible for reclaiming space when free space is low. For example: - -```yaml -storage: - reclamation: - threshold: 0.4 # Start storage reclamation efforts when free space has reached 40% of the volume space (default) - interval: 1h # Reclamation will run every hour (default) - evictionFactor: 100000 # A factor used to determine how much aggressively to evict cached entries (default) -``` - ---- - -### `tls` - -The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`ciphers` - _Type_: string; - -Allows specific ciphers to be set. - -If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: - -```yaml -tls: - - certificate: ~/hdb/keys/certificate1.pem - certificateAuthority: ~/hdb/keys/ca1.pem - privateKey: ~/hdb/keys/privateKey1.pem - host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. - - certificate: ~/hdb/keys/certificate2.pem - certificateAuthority: ~/hdb/keys/ca2.pem - privateKey: ~/hdb/keys/privateKey2.pem -``` - -Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. - ---- - -### `mqtt` - -The MQTT protocol can be configured in this section. - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 - mtls: false - webSocket: true - requireAuthentication: true -``` - -`port` - _Type_: number; _Default_: 1883 - -This is the port to use for listening for insecure MQTT connections. - -`securePort` - _Type_: number; _Default_: 8883 - -This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. - -`webSocket` - _Type_: boolean; _Default_: true - -This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. - -`requireAuthentication` - _Type_: boolean; _Default_: true - -This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). - -`mtls` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` - -This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. - -`certificateVerification` - _Type_: boolean | object; _Default_: true - -When mTLS is enabled, Harper verifies the revocation status of client certificates using OCSP (Online Certificate Status Protocol). This ensures that revoked certificates cannot be used for authentication. - -Set to `false` to disable certificate verification, or configure with an object: - -- `timeout` - _Type_: number; _Default_: 5000 - Maximum milliseconds to wait for OCSP response -- `cacheTtl` - _Type_: number; _Default_: 3600000 - Milliseconds to cache verification results (default: 1 hour) -- `failureMode` - _Type_: string; _Default_: 'fail-open' - Behavior when OCSP verification fails: - - `'fail-open'`: Allow connection on verification failure (logs warning) - - `'fail-closed'`: Reject connection on verification failure - -For example, you could specify that mTLS is required and will authenticate as "user-name": - -```yaml -mqtt: - network: - mtls: - user: user-name - required: true -``` - ---- - -### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. This configuration should be set before the database and table have been created. The configuration will not create the directories in the path, that must be done by the user. - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` - -### `analytics` - -`analytics_aggregatePeriod` - _Type_: number; _Default_: 60 (seconds) - -This defines how often recorded metrics in the `system.hdb_raw_analytics` table are aggregated into the `system.hdb_analytics` table. The analytics operations in the operations API exclusively use the aggregated analytics. - -```yaml -analytics: - aggregatePeriod: 60 -``` - -`analytics_replicate` - _Type_: boolean; _Default_: false - -This defines whether or not the aggregated analytics data in `system.hdb_analytics` should be replicated to the rest of the cluster. - -```yaml -analytics: - replicate: true -``` - ---- - -### Components - -`` - _Type_: string - -The name of the component. This will be used to name the folder where the component is installed and must be unique. - -`package` - _Type_: string - -A reference to your [component](../reference/components/applications#adding-components-to-root) package. This could be a remote git repo, a local folder/file or an NPM package. Harper will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. - -Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) - -`port` - _Type_: number _Default_: whatever is set in `http.port` - -The port that your component should listen on. If no port is provided it will default to `http.port` - -```yaml -: - package: 'HarperDB-Add-Ons/package-name' - port: 4321 -``` diff --git a/docs/deployments/harper-cli.md b/docs/deployments/harper-cli.md deleted file mode 100644 index d447e892..00000000 --- a/docs/deployments/harper-cli.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: Harper CLI ---- - -# Harper CLI - -## Harper CLI - -The Harper command line interface (CLI) is used to administer [self-installed Harper instances](install-harper/). - -### Installing Harper - -To install Harper with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, Harper installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -**Environment Variables** - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -**Command Line Arguments** - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -### Starting Harper - -To start Harper after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -### Stopping Harper - -To stop Harper once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -### Restarting Harper - -To restart Harper once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -### Getting the Harper Version - -To check the version of Harper that is installed run the following command: - -```bash -harperdb version -``` - ---- - -### Renew self-signed certificates - -To renew the Harper generated self-signed certificates, run: - -```bash -harperdb renew-certs -``` - ---- - -### Copy a database with compaction - -To copy a Harper database with compaction (to eliminate free-space and fragmentation), use - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - ---- - -### Get all available CLI commands - -To display all available Harper CLI commands along with a brief description run: - -```bash -harperdb help -``` - ---- - -### Get the status of Harper and clustering - -To display the status of the Harper process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - ---- - -### Backups - -Harper uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that Harper maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a Harper database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. - ---- - -## Operations API through the CLI - -Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. To call the operation use the following convention: ` =`. By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. - -Some examples are: - -```bash -$ harperdb describe_table database=dev table=dog - -schema: dev -name: dog -hash_attribute: id -audit: true -schema_defined: false -attributes: - - attribute: id - is_primary_key: true - - attribute: name - indexed: true -clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b -record_count: 10 -last_updated_record: 1724483231970.9949 -``` - -`harperdb set_configuration logging_level=error` - -`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` - -`harperdb get_components` - -`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` - -`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` - -`harperdb sql sql='select * from dev.dog where id="1"'` - -### Remote Operations - -The CLI can also be used to run operations on remote Harper instances. To do this, pass the `target` parameter with the HTTP address of the remote instance. You generally will also need to provide credentials and specify the `username` and `password` parameters, or you can set environment variables `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD`, for example: - -```bash -export CLI_TARGET_USERNAME=HDB_ADMIN -export CLI_TARGET_PASSWORD=password -harperdb describe_database database=dev target=https://server.com:9925 -``` - -The same set of operations API are available for remote operations as well. - -#### Remote Component Deployment - -When using remote operations, you can deploy a local component to the remote instance. If you omit the `package` parameter, you can deploy the current directory. This will package the current directory and send it to the target server (also `deploy` is allowed as an alias to `deploy_component`): - -```bash -harperdb deploy target=https://server.com:9925 -``` - -If you are interacting with a cluster, you may wish to include the `replicated=true` parameter to ensure that the deployment operation is replicated to all nodes in the cluster. You will also need to restart afterwards to apply the changes (here seen with the replicated parameter): - -```bash -harperdb restart target=https://server.com:9925 replicated=true -``` diff --git a/docs/deployments/harper-cloud/alarms.md b/docs/deployments/harper-cloud/alarms.md deleted file mode 100644 index 372807e5..00000000 --- a/docs/deployments/harper-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -Harper Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harper-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | --------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harper-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harper-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harper-studio/instance-configuration#update-instance-ram) | diff --git a/docs/deployments/harper-cloud/index.md b/docs/deployments/harper-cloud/index.md deleted file mode 100644 index c0785d0d..00000000 --- a/docs/deployments/harper-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Harper Cloud ---- - -# Harper Cloud - -[Harper Cloud](https://studio.harperdb.io/) is the easiest way to test drive Harper, it’s Harper-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. Harper Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new Harper Cloud instance in the Harper Studio. diff --git a/docs/deployments/harper-cloud/instance-size-hardware-specs.md b/docs/deployments/harper-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 6ea4c7d2..00000000 --- a/docs/deployments/harper-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While Harper Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/docs/deployments/harper-cloud/iops-impact.md b/docs/deployments/harper-cloud/iops-impact.md deleted file mode 100644 index 0b32df8e..00000000 --- a/docs/deployments/harper-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -Harper, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running Harper. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that Harper performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers Harper Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## Harper Cloud Storage - -Harper Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all Harper Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html. - -## Estimating IOPS for Harper Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact Harper Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. Harper utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any Harper operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to Harper’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, Harper should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/docs/deployments/harper-cloud/verizon-5g-wavelength-instances.md b/docs/deployments/harper-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index aae57f67..00000000 --- a/docs/deployments/harper-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your Harper instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -Harper on Verizon 5G Wavelength brings Harper closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from Harper to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -Harper 5G Wavelength Instance Specs While Harper 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## Harper 5G Wavelength Storage - -Harper 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of Harper, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger Harper volume. Learn more about the [impact of IOPS on performance here](iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/docs/deployments/install-harper/index.md b/docs/deployments/install-harper/index.md deleted file mode 100644 index a11c85d8..00000000 --- a/docs/deployments/install-harper/index.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Install Harper ---- - -# Install Harper - -## Install Harper - -This documentation contains information for installing Harper locally. Note that if you’d like to get up and running quickly, you can deploy it to [Harper Fabric](https://fabric.harper.fast) our distributed data application platform service. Harper is a cross-platform database; we recommend Linux for production use. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. Harper can also run on Windows and Mac, for development purposes only. Note: For Windows, we strongly recommend the use of Windows Subsystem for Linux (WSL). - -Harper runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing Harper, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start Harper - -Then you can install Harper with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -Harper will automatically start after installation. Harper's installation can be configured with numerous options via CLI arguments, for more information visit the [Harper Command Line Interface](./harper-cli) guide. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harper/linux). - -## With Docker - -If you would like to run Harper in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. You can then pull the image: - -```bash -docker pull harperdb/harperdb -``` - -Start a container, mount a volume and pass environment variables: - -```bash -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_ADMIN_USERNAME=HDB_ADMIN \ - -e HDB_ADMIN_PASSWORD=password \ - -e THREADS=4 \ - -e OPERATIONSAPI_NETWORK_PORT=null \ - -e OPERATIONSAPI_NETWORK_SECUREPORT=9925 \ - -e HTTP_SECUREPORT=9926 \ - -p 9925:9925 \ - -p 9926:9926 \ - -p 9933:9933 \ - harperdb/harperdb -``` - -Here, the `` should be replaced with an actual directory path on your system where you want to store the persistent data. This command also exposes both the Harper Operations API (port 9925) and an additional HTTP port (9926). - -✅ Quick check: - -```bash -curl http://localhost:9925/health -``` - -:::info -💡 Why choose Docker: Great for consistent team environments, CI/CD pipelines, or deploying Harper alongside other services. -::: - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a Harper container. - -## Offline Install - -If you need to install Harper on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -## Installation on Less Common Platforms - -Harper comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/docs/deployments/install-harper/linux.md b/docs/deployments/install-harper/linux.md deleted file mode 100644 index cc312bac..00000000 --- a/docs/deployments/install-harper/linux.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install Harper. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to Harper with sudo privileges exists -1. An additional volume for storing Harper files is attached to the Linux instance -1. Traffic to ports 9925 (Harper Operations API) 9926 (Harper Application Interface) and 9932 (Harper Clustering) is permitted - -While you will need to access Harper through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start Harper - -Here is an example of installing Harper with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing Harper with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../configuration): - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" -``` - -#### Start Harper on Boot - -Harper will automatically start after installation. If you wish Harper to start when the OS boots, you have two options: - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=Harper - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [Harper Command Line Interface guide](../harper-cli) and the [Harper Configuration File guide](../configuration). diff --git a/docs/deployments/upgrade-hdb-instance.md b/docs/deployments/upgrade-hdb-instance.md deleted file mode 100644 index da1c885f..00000000 --- a/docs/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Upgrade a Harper Instance ---- - -# Upgrade a Harper Instance - -This document describes best practices for upgrading self-hosted Harper instances. Harper can be upgraded using a combination of npm and built-in Harper upgrade scripts. Whenever upgrading your Harper installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted Harper instances only. All [Harper Cloud instances](harper-cloud/) will be upgraded by the Harper Cloud team. - -## Upgrading - -Upgrading Harper is a two-step process. First the latest version of Harper must be downloaded from npm, then the Harper upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of Harper using `npm install -g harperdb`. - - Note `-g` should only be used if you installed Harper globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - Harper will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -Harper supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading Harper, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that Harper is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure Harper is not running: - -```bash -harperdb stop -``` - -Uninstall Harper. Note, this step is not required, but will clean up old artifacts of Harper. We recommend removing all other Harper installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install Harper globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start Harper - -```bash -harperdb start -``` - ---- - -## Upgrading Nats to Plexus 4.4 - -To upgrade from NATS clustering to Plexus replication, follow these manual steps. They are designed for a fully replicating cluster to ensure minimal disruption during the upgrade process. - -The core of this upgrade is the _bridge node_. This node will run both NATS and Plexus simultaneously, ensuring that transactions are relayed between the two systems during the transition. The bridge node is crucial in preventing any replication downtime, as it will handle transactions from NATS nodes to Plexus nodes and vice versa. - -### Enabling Plexus - -To enable Plexus on a node that is already running NATS, you will need to update [two values](configuration) in the `harperdb-config.yaml` file: - -```yaml -replication: - url: wss://my-cluster-node-1:9925 - hostname: node-1 -``` - -`replication.url` – This should be set to the URL of the current Harper instance. - -`replication.hostname` – Since we are upgrading from NATS, this value should match the `clustering.nodeName` of the current instance. - -### Upgrade Steps - -1. Set up the bridge node: - - Choose one node to be the bridge node. - - On this node, follow the "Enabling Plexus" steps from the previous section, but **do not disable NATS clustering on this instance.** - - Stop the instance and perform the upgrade. - - Start the instance. This node should now be running both Plexus and NATS. -1. Upgrade a node: - - Choose a node that needs upgrading and enable Plexus by following the "Enable Plexus" steps. - - Disable NATS by setting `clustering.enabled` to `false`. - - Stop the instance and upgrade it. - - Start the instance. - - Call [`add_node`](../developers/operations-api/clustering#add-node) on the upgraded instance. In this call, omit `subscriptions` so that a fully replicating cluster is built. The target node for this call should be the bridge node. _Note: depending on your setup, you may need to expand this `add_node` call to include_ [_authorization and/or tls information_](../developers/operations-api/clustering#add-node)_._ - -```json -{ - "operation": "add_node", - "hostname:": "node-1", - "url": "wss://my-cluster-node-1:9925" -} -``` - -1. Repeat Step 2 on all remaining nodes that need to be upgraded. -1. Disable NATS on the bridge node by setting `clustering.enabled` to `false` and restart the instance. - -Your cluster upgrade should now be complete, with no NATS processes running on any of the nodes. diff --git a/docs/developers/_category_.json b/docs/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/docs/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/docs/developers/applications/caching.md b/docs/developers/applications/caching.md deleted file mode 100644 index 27e6f4e2..00000000 --- a/docs/developers/applications/caching.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -title: Caching ---- - -# Caching - -Harper has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, Harper makes an ideal data caching server. Harper can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. Harper also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, Harper is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: - -- `expiration` - The amount of time until a record goes stale. -- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). -- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -#### How `scanInterval` Determines the Eviction Cycle - -`scanInterval` determines fixed clock-aligned times when eviction runs, and these times are the same regardless of when the server started. Harper takes the `scanInterval` and divides the TTL (`expiration` + `eviction`) into evenly spaced “anchor times.” These anchors are calculated in the local timezone of the server. This allows Harper to “snap” the eviction schedule to predictable points on the clock, such as every 15 minutes or every 6 hours, based on the interval length. As a result: - -- The server’s startup time does not affect when eviction runs. -- Eviction timings are deterministic and timezone-aware. -- For any given configuration, the eviction schedule is the same across restarts and across servers in the same local timezone. - -#### Example: 1-Hour Expiration - -`expiration` = 1 hour with default `scanInterval` (15 minutes, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 00:15 -> 00:30 -> 00:45 -> 01:00 -> ... continuing every 15 minutes ... - -If the server starts at 12:05 it does not run eviction at 12:20 or “15 minutes after startup.” Instead, the next scheduled anchor is 12:15, then 12:30, 12:45, 13:00, etc. The schedule is clock-aligned, not startup-aligned. - -#### Example: 1-Day Expiration - -`expiration` = 1 day with default `scanInterval` (6 hours, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 06:00 -> 12:00 -> 18:00 -> ... continuing every 6 hours ... - -If the server starts at 12:05 the next matching eviction time is 18:00 the same day, then 00:00, 06:00, 12:00, 18:00, etc. If the server starts at 19:30 the schedule does not shift. Instead, the next anchor time is 00:00, and the regular 6-hour cycle continues. - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the Harper environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyCache } = tables; -MyCache.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), Harper will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -Harper handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - setInterval(() => { // every second retrieve more data - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - }, 1000); - } - async get() { -... -``` - -Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record. -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, Harper will only run the subscribe method on one thread. Harper is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in Harper. Caching tables also have [subscription capabilities](caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of Harper, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - const post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of post.comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). - -## Caching Flow - -It may be helpful to understand the flow of a cache request. When a request is made to a caching table: - -- Harper will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. - - If the record is not in the cache, Harper will first check if there is a current request to get the record from the source. If there is, Harper will wait for the request to complete and return the record from the cache. - - If not, Harper will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. - - If the record is in the cache, Harper will check if the record is stale. If the record is not stale, Harper will immediately return the record from the cache. If the record is stale, Harper will call the `get()` method on the source to retrieve the record. - - The record will then be stored in the cache. This will write the record to the cache in a separate asynchronous/background write-behind transaction, so it does not block the current request, then return the data immediately once it has it. -- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. - -### Caching Flow with Write-Through - -When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: - -- Harper will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. -- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. -- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. - - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). - - The transaction writes will then be written the local caching table. -- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/docs/developers/applications/data-loader.md b/docs/developers/applications/data-loader.md deleted file mode 100644 index b4059207..00000000 --- a/docs/developers/applications/data-loader.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: Data Loader ---- - -# Data Loader - -The Data Loader is a built-in component that provides a reliable mechanism for loading data from JSON or YAML files into Harper tables as part of component deployment. This feature is particularly useful for ensuring specific records exist in your database when deploying components, such as seed data, configuration records, or initial application data. - -## Configuration - -To use the Data Loader, first specify your data files in the `config.yaml` in your component directory: - -```yaml -dataLoader: - files: 'data/*.json' -``` - -The Data Loader is an [Extension](../../reference/components#extensions) and supports the standard `files` configuration option. - -## Data File Format - -Data files can be structured as either JSON or YAML files containing the records you want to load. Each data file must specify records for a single table - if you need to load data into multiple tables, create separate data files for each table. - -### Basic Example - -Create a data file in your component's data directory (one table per file): - -```json -{ - "database": "myapp", - "table": "users", - "records": [ - { - "id": 1, - "username": "admin", - "email": "admin@example.com", - "role": "administrator" - }, - { - "id": 2, - "username": "user1", - "email": "user1@example.com", - "role": "standard" - } - ] -} -``` - -### Multiple Tables - -To load data into multiple tables, create separate data files for each table: - -**users.json:** - -```json -{ - "database": "myapp", - "table": "users", - "records": [ - { - "id": 1, - "username": "admin", - "email": "admin@example.com" - } - ] -} -``` - -**settings.yaml:** - -```yaml -database: myapp -table: settings -records: - - id: 1 - setting_name: app_name - setting_value: My Application - - id: 2 - setting_name: version - setting_value: '1.0.0' -``` - -## File Organization - -You can organize your data files in various ways: - -### Single File Pattern - -```yaml -dataLoader: - files: 'data/seed-data.json' -``` - -### Multiple Files Pattern - -```yaml -dataLoader: - files: - - 'data/users.json' - - 'data/settings.yaml' - - 'data/initial-products.json' -``` - -### Glob Pattern - -```yaml -dataLoader: - files: 'data/**/*.{json,yaml,yml}' -``` - -## Loading Behavior - -When Harper starts up with a component that includes the Data Loader: - -1. The Data Loader reads all specified data files (JSON or YAML) -1. For each file, it validates that a single table is specified -1. Records are inserted or updated based on content hash comparison: - - New records are inserted if they don't exist - - Existing records are updated only if the data file content has changed - - User modifications made via Operations API or other methods are preserved - those records won't be overwritten - - Users can add extra fields to data-loader records without blocking future updates to the original fields -1. The Data Loader uses SHA-256 content hashing stored in a system table (`hdb_dataloader_hash`) to track which records it has loaded and detect changes - -### Change Detection - -The Data Loader intelligently handles various scenarios: - -- **New records**: Inserted with their content hash stored -- **Unchanged records**: Skipped (no database writes) -- **Changed data file**: Records are updated using `patch` to preserve any extra fields users may have added -- **User-created records**: Records created outside the Data Loader (via Operations API, REST, etc.) are never overwritten -- **User-modified records**: Records modified after being loaded are preserved and not overwritten -- **User-added fields**: Extra fields added to data-loader records are preserved during updates - -This approach ensures data files can be safely reloaded across deployments and node scaling without losing user modifications. - -Note: While the Data Loader can create tables automatically by inferring the schema from the provided records, it's recommended to define your table schemas explicitly using the [graphqlSchema](../applications/defining-schemas) component for better control and type safety. - -## Best Practices - -1. **Define Schemas First**: While the Data Loader can infer schemas, it's strongly recommended to define your table schemas and relations explicitly using the [graphqlSchema](../applications/defining-schemas) component before loading data. This ensures proper data types, constraints, and relationships between tables. - -1. **One Table Per File**: Remember that each data file can only load records into a single table. Organize your files accordingly. - -1. **Idempotency**: Design your data files to be idempotent - they should be safe to load multiple times without creating duplicate or conflicting data. - -1. **Version Control**: Include your data files in version control to ensure consistency across deployments. - -1. **Environment-Specific Data**: Consider using different data files for different environments (development, staging, production). - -1. **Data Validation**: Ensure your data files are valid JSON or YAML and match your table schemas before deployment. - -1. **Sensitive Data**: Avoid including sensitive data like passwords or API keys directly in data files. Use environment variables or secure configuration management instead. - -## Example Component Structure - -``` -my-component/ -├── config.yaml -├── data/ -│ ├── users.json -│ ├── roles.json -│ └── settings.json -├── schemas.graphql -└── roles.yaml -``` - -With this structure, your `config.yaml` might look like: - -```yaml -# Load environment variables first -loadEnv: - files: '.env' - -# Define schemas -graphqlSchema: - files: 'schemas.graphql' - -# Define roles -roles: - files: 'roles.yaml' - -# Load initial data -dataLoader: - files: 'data/*.json' - -# Enable REST endpoints -rest: true -``` - -## Related Documentation - -- [Built-In Components](../../reference/components/built-in-extensions) -- [Extensions](../../reference/components/extensions) -- [Bulk Operations](../operations-api/bulk-operations) - For loading data via the Operations API diff --git a/docs/developers/applications/debugging.md b/docs/developers/applications/debugging.md deleted file mode 100644 index bd9d2622..00000000 --- a/docs/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -Harper components and applications run inside the Harper process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the Harper entry script with your IDE, or you can start Harper in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run Harper in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use Harper's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by Harper. This logger can be used to output messages directly to the Harper log using standardized logging level functions, described below. The log level can be set in the [Harper Configuration File](../../deployments/configuration). - -Harper Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The Harper Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the Studio Status page. Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the Harper log. diff --git a/docs/developers/applications/define-routes.md b/docs/developers/applications/define-routes.md deleted file mode 100644 index d16c787e..00000000 --- a/docs/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -Harper’s applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](define-routes#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/docs/developers/applications/defining-roles.md b/docs/developers/applications/defining-roles.md deleted file mode 100644 index 365aa132..00000000 --- a/docs/developers/applications/defining-roles.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Defining Application Roles ---- - -# Defining Application Roles - -Applications are more than just tables and endpoints — they need access rules. Harper lets you define roles directly in your application so you can control who can do what, without leaving your codebase. - -Let’s walk through creating a role, assigning it, and seeing it in action. - -## Step 1: Declare a Role - -First, point Harper to a roles configuration file. Add this to your `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -Then create a simple `roles.yaml` in your application directory. For example, here’s a role that can only read and insert data into the `Dog` table: - -```yaml -dog-reader: - super_user: false - data: - Dog: - read: true - insert: true -``` - -When Harper starts up, it will create this role (or update it if it already exists). - -## Step 2: Create a User for the Role - -Next, create a non-super_user user and assign them this role. You can do this with the [Users and Roles API](../security/users-and-roles) (requires a super_user to run): - -```bash -curl -u admin:password -X POST http://localhost:9926 \ - -H "Content-Type: application/json" \ - -d '{ - "operation": "add_user", - "username": "alice", - "password": "password", - "role": "dog_reader" - }' -``` - -Now you have a user named `alice` with the `dog_reader` role. - -## Step 3: Make Requests as Different Users - -Authenticate requests as `alice` to see how her role works: - -```bash -# allowed (insert, role permits insert) -curl -u alice:password -X POST http://localhost:9926/Dog/ \ - -H "Content-Type: application/json" \ - -d '{"name": "Buddy", "breed": "Husky"}' - -# not allowed (delete, role does not permit delete) -curl -u alice:password -X DELETE http://localhost:9926/Dog/1 -``` - -The first request succeeds with a `200 OK`. The second fails with a `403 Forbidden`. - -Now compare with a super_user: - -```bash -# super_user can delete -curl -u admin:password -X DELETE http://localhost:9926/Dog/1 -``` - -This succeeds because the super_user role has full permissions. - -## Where to Go Next - -This page gave you the basics - declare a role, assign it, and see it work. - -For more advanced scenarios, including: - -- defining multiple databases per role, -- granting fine-grained attribute-level permissions, -- and the complete structure of `roles.yaml`, - -see the [Roles Reference](../../reference/roles). diff --git a/docs/developers/applications/defining-schemas.md b/docs/developers/applications/defining-schemas.md deleted file mode 100644 index 2a5e821e..00000000 --- a/docs/developers/applications/defining-schemas.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in Harper's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that Harper uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -Database naming: the default "data" database is generally a good default choice for tables in applications that will not be reused in other applications (and don't need to worry about staying in a separate namespace). Application with many tables may wish to organize the tables into separate databases (but remember that transactions do not preserve atomicity across different databases, only across tables in the same database). For components that are designed for re-use, it is recommended that you use a database name that is specific to the component (e.g. "my-component-data") to avoid name collisions with other components. - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -### Relationships: `@relationship` - -Defining relationships is the foundation of using "join" queries in Harper. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. - -#### `@relationship(from: attribute)` - -This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: - -```graphql -type Product @table @export { - id: ID @primaryKey - brandId: ID @indexed - brand: Brand @relationship(from: brandId) -} -type Brand @table @export { - id: ID @primaryKey -} -``` - -Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resources/) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). - -Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: - -```graphql -type Product @table @export { - id: ID @primaryKey - featureIds: [ID] @indexed # array of ids - features: [Feature] @relationship(from: featureIds) # array of referenced feature records -} -type Feature @table { - id: ID @primaryKey - ... -} -``` - -#### `@relationship(to: attribute)` - -This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: - -```graphql -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: brandId) -} -``` - -Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. - -Note that schemas can also reference themselves with relationships, allowing records to define relationships like parent-child relationships between records in the same table. Also note, that for a many-to-many relationship, you must not combine the `to` and `from` property in the same relationship directive. - -### Computed Properties: `@computed` - -The `@computed` directive specifies that a field is computed based on other fields in the record. This is useful for creating derived fields that are not stored in the database, but are computed when specific record fields is queried/accessed. The `@computed` directive must be used in combination with a field that is a function that computes the value of the field. For example: - -```graphql -type Product @table { - id: ID @primaryKey - price: Float - taxRate: Float - totalPrice: Float @computed(from: "price + (price * taxRate)") -} -``` - -The `from` argument specifies the expression that computes the value of the field. The expression can reference other fields in the record. The expression is evaluated when the record is queried or indexed. - -The `computed` directive may also be defined in a JavaScript module, which is useful for more complex computations. You can specify a computed attribute, and then define the function with the `setComputedAttribute` method. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed -} -``` - -```javascript -tables.Product.setComputedAttribute('totalPrice', (record) => { - return record.price + record.price * record.taxRate; -}); -``` - -Computed properties may also be indexed, which provides a powerful mechanism for creating indexes on derived fields with custom querying capabilities. This can provide a mechanism for composite indexes, custom full-text indexing, vector indexing, or other custom indexing strategies. A computed property can be indexed by adding the `@indexed` directive to the computed property. When using a JavaScript module for a computed property that is indexed, it is highly recommended that you specify a `version` argument to ensure that the computed attribute is re-evaluated when the function is updated. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed(version: 1) @indexed -} -``` - -If you were to update the `setComputedAttribute` function for the `totalPrice` attribute, to use a new formula, you must increment the `version` argument to ensure that the computed attribute is re-indexed (note that on a large database, re-indexing may be a lengthy operation). Failing to increment the `version` argument with a modified function can result in an inconsistent index. The computed function must be deterministic, and should not have side effects, as it may be re-evaluated multiple times during indexing. - -Note that computed properties will not be included by default in a query result, you must explicitly include them in query results using the `select` query function. - -Another example of using a computed custom index, is that we could index all the comma-separated words in a `tags` property by doing (similar techniques are used for full-text indexing): - -```graphql -type Product @table { - id: ID @primaryKey - tags: String # comma delimited set of tags - tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed # split and index the tags -} -``` - -For more in-depth information on computed properties, visit our blog [here](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated if no primary key is provided. When a primary key is auto-generated, it will be a UUID (as a string) if the primary key type is `String` or `ID`. If the primary key type is `Int`, `Long`, or `Any`, then the primary key will be an auto-incremented number. Using numeric primary keys is more efficient than using UUIDs. Note that if the type is `Int`, the primary key will be limited to 32-bit, which can be limiting and problematic for large tables. It is recommended that if you will be relying on auto-generated keys, that you use a primary key type of `Long` or `Any` (the latter will allow you to also use strings as primary keys). - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. When an attribute is indexed, Harper will create secondary index from the data in this field for fast/efficient querying using this field. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -A standard index will index the values in each field, so you can query directly by those values. If the field's value is an array, each of the values in the array will be indexed (you can query by any individual value). - -#### Vector Indexing - -The `@indexed` directive can also specify a `type`. To use vector indexing, you can specify the `type` as `HNSW` for Hierarchical Navigable Small World indexing. This will create a vector index for the attribute. For example: - -```graphql -type Product @table { - id: Long @primaryKey - textEmbeddings: [Float] @indexed(type: "HNSW") -} -``` - -HNSW indexing finds the nearest neighbors to a search vector. To use this, you can query with a `sort` parameter, for example: - -```javascript -let results = Product.search({ - sort: { attribute: 'textEmbeddings', target: searchVector }, - limit: 5, // get the five nearest neighbors -}); -``` - -This can be used in combination with other conditions as well, for example: - -```javascript -let results = Product.search({ - conditions: [{ attribute: 'price', comparator: 'lt', value: 50 }], - sort: { attribute: 'textEmbeddings', target: searchVector }, - limit: 5, // get the five nearest neighbors -}); -``` - -HNSW supports several additional arguments to the `@indexed` directive to adjust the HNSW parameters: - -- `distance` - Define the distance function. This can be set to 'euclidean' or 'cosine' (uses negative of cosine similarity). The default is cosine. -- `efConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors. A higher value can yield better recall, and a lower value can have better performance. If `efSearchConstruction` is set, this is only applied to indexing. The default is 100. -- `M` - The preferred number of connections at each layer in the HNSW graph. A higher number uses more space but can be helpful when the intrinsic dimensionality of the data is higher. A lower number can be more efficient. The default is 16. -- `optimizeRouting` - This uses a heuristic to avoid graph connections that match existing indirect connections (connections through another node). This can yield more efficient graph traversals for the same M setting. This is a number between 0 and 1 and a higher value will more aggressively omit connections with alternate paths. Setting this to 0 will disable route optimizing and follow the traditional HNSW algorithm for creating connections. The default is 0.5. -- `mL` - The normalization factor for level generation, by default this is computed from `M`. -- `efSearchConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors for searching. The default is 50. - -For example - -```graphql -type Product @table { - id: Long @primaryKey - textEmbeddings: [Float] @indexed(type: "HNSW", distance: "euclidean", optimizeRouting: 0, efSearchConstruction: 100) -} -``` - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself. - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -Harper supports the following field types in addition to user defined (object) types: - -- `String`: String/text -- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647) -- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992) -- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available) -- `BigInt`: Any integer (negative or positive) with less than 300 digits (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately) -- `Boolean`: true or false -- `ID`: A string (but indicates it is not intended to be human readable) -- `Any`: Any primitive, object, or array is allowed -- `Date`: A Date object -- `Bytes`: Binary data as a Buffer or Uint8Array -- `Blob`: Binary data as a [Blob](../../reference/blob), designed for large blocks of data that can be streamed. It is recommend that you use this for binary data that will typically be larger than 20KB. - -#### Renaming Tables - -It is important to note that Harper does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. - -### OpenAPI Specification - -_The_ [_OpenAPI Specification_](https://spec.openapis.org/oas/v3.1.0) _defines a standard, programming language-agnostic interface description for HTTP APIs, which allows both humans and computers to discover and understand the capabilities of a service without requiring access to source code, additional documentation, or inspection of network traffic._ - -If a set of endpoints are configured through a Harper GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. - -_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/docs/developers/applications/index.md b/docs/developers/applications/index.md deleted file mode 100644 index 8ac979ab..00000000 --- a/docs/developers/applications/index.md +++ /dev/null @@ -1,244 +0,0 @@ ---- -title: Applications ---- - -# Applications - -Harper is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of Harper instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that Harper provides by building a Harper application, a fundamental building-block of the Harper ecosystem. - -When working through this guide, we recommend you use the [Harper Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -Before we get started, let's clarify some terminology that is used throughout the documentation. - -**Components** are the high-level concept for any modules that extend the Harper core platform adding additional functionality. The application you will build here is a type of component. In addition to applications, components also encompass extensions. - -> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. - -**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. - -**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. - -All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. - -Extensions can also depend on other extensions. For example, the [`@harperdb/apollo`](https://github.com/HarperDB/apollo) extension depends on the built-in `graphqlSchema` extension to create a cache table for Apollo queries. Applications can then use the `@harperdb/apollo` extension to implement an Apollo GraphQL backend server. - -```mermaid -flowchart TD - subgraph Applications - direction TB - NextJSApp["Next.js App"] - ApolloApp["Apollo App"] - CustomResource["Custom Resource"] - end - - subgraph Extensions - direction TB - subgraph Custom - NextjsExt["@harperdb/nextjs"] - ApolloExt["@harperdb/apollo"] - end - subgraph Built-In - GraphqlSchema["graphqlSchema"] - JsResource["jsResource"] - Rest["rest"] - end - end - - subgraph Core - direction TB - Database["database"] - FileSystem["file-system"] - Networking["networking"] - end - - NextJSApp --> NextjsExt - ApolloApp --> ApolloExt - CustomResource --> JsResource & GraphqlSchema & Rest - - NextjsExt --> Networking - NextjsExt --> FileSystem - ApolloExt --> GraphqlSchema - ApolloExt --> Networking - - GraphqlSchema --> Database - JsResource --> Database - Rest --> Networking -``` - -> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](../../reference/components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -Beyond applications and extensions, components are further classified as built-in or custom. **Built-in** components are included with Harper by default and can be directly referenced by their name. The `graphqlSchema`, `rest`, and `jsResource` extensions used in the previous application example are all examples of built-in extensions. **Custom** components must use external references, generally npm or GitHub packages, and are often included as dependencies within the `package.json` of the component. - -> Harper maintains a number of custom components that are available on `npm` and `GitHub`, such as the [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) extension or the [`@harperdb/status-check`](https://github.com/HarperDB/status-check) application. - -Harper does not currently include any built-in applications, making "custom applications" a bit redundant. Generally, we just say "application". However, there is a multitude of both built-in and custom extensions, and so the documentation refers to them as such. A complete list of built-in extensions is available in the [Built-In Extensions](../../reference/components/built-in-extensions) documentation page, and the list of custom extensions and applications is available below. - -This guide is going to walk you through building a basic Harper application using a set of built-in extensions. - -> The Reference -> Components section of the documentation contains a [complete reference for all aspects of components](../../reference/components), applications, extensions, and more. - -## Custom Functionality with JavaScript - -[The getting started guide](/learn/) covers how to build an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. In Harper, data is accessed through our [Resource API](../../reference/resources/), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). Let's add a property to the dog records when they are returned, that includes their age in human years. To do this, we get the `Dog` class from the defined tables, extend it (with our custom logic), and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the Harper provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - static loadAsInstance = false; - async get(target) { - const record = await super.get(target); - return { - ...record, // include all properties from the record - humanAge: 15 + record.age * 5, // silly calculation of human age equivalent - }; - } -} -``` - -Here we exported the `DogWithHumanAge` class, which directly maps to the endpoint path (exported with the same name). Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. The `return super.get(target)` call at the end allows for any request target information (like query parameters) to be applied to the `get`, allowing metadata or queries to be pass through. - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -We use the new table's (static) `get()` method to retrieve a breed by id. Harper will maintain the current context, ensuring that we are accessing the data atomically, in a consistent snapshot across tables. This provides: - -1. Automatic tracking of most recently updated timestamps across resources for caching purposes -2. Sharing of contextual metadata (like user who requested the data) -3. Transactional atomicity for any writes (not needed in this get operation, but important for other operations) - -The resource methods are automatically wrapped with a transaction and will automatically commit the changes when the method finishes. This allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - static loadAsInstance = false; - async get(target) { - // get the Dog record - const record = await super.get(target); - // get the Breed record - let breedDescription = await Breed.get(record.breed); - return { - ...record, - breedDescription, - }; - } -} -``` - -The call to `Breed.get` will return a record from the `Breed` table as specified by the provided id/primary key. Like the `Dog` record, we can directly use this object or copy properties. - -We may also want to customize access to this data. By default, the `target` has a `checkPermission` property that indicates that the table's `get` method will check if there is a valid user with access to a table before returning a record (and throw an `AccessViolation` if they do not). However, we can explicitly allow permission to the table's data/records by setting `checkPermission` to `false`: - -```javascript - async get(target) { - target.checkPermission = false; - const record = await super.get(target); - ... -``` - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - static loadAsInstance = false; - async post(target, data) { - if (data.action === 'add-trick') { - const record = this.update(target); - record.tricks.push(data.trick); // will be persisted when the transaction commits - } - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The `update` method return an `Updatable` object, which automatically tracks changes you make to your record and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -We can also define custom authorization capabilities here. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post()` method or `put()` method to do this. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - static loadAsInstance = false; - async post(target, data) { - if (data.action === 'add-trick') { - const context = this.getContext(); - // if we want to skip the default permission checks, we can turn off checkPermissions: - target.checkPermission = false; // don't have update perform any permission check - const record = this.update(target); - // and do our own/custom permission check: - if (record.owner !== context.user?.username) { - throw new Error('Can not update this record'); - } - record.tricks.push(data.trick); - } - } -} -``` - -Any methods that are not defined will fall back to Harper's default authorization procedure based on users' roles. If you are using/extending a table, this is based on Harper's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to the root url `/` to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the `Resource` class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get(target) { - return (await fetch(`https://best-dog-site.com/${target}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](applications/caching) provides much more information on how to use Harper's powerful caching capabilities and set up data sources. - -Harper provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../../reference/globals) and the [Resource interface](../../reference/resources). - -## Configuring Applications/Components - -For complete information of configuring applications, refer to the [Component Configuration](../../reference/components) reference page. - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. Harper includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as Harper's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the Harper's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, Harper will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/docs/developers/applications/web-applications.md b/docs/developers/applications/web-applications.md deleted file mode 100644 index 02fd1893..00000000 --- a/docs/developers/applications/web-applications.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Web Applications on Harper ---- - -# Web Applications on Harper - -Harper is an efficient, capable, and robust platform for developing web applications, with numerous capabilities designed -specifically for optimized web application delivery. In addition, there are a number of tools and frameworks that can be used -with Harper to create web applications with standard best-practice design and development patterns. Running these frameworks -on Harper can unlock tremendous scalability and performance benefits by leveraging Harper's built-in multi-threading, -caching, and distributed design. - -Harper's unique ability to run JavaScript code directly on the server side, combined with its built-in database for data storage, querying, and caching -allows you to create full-featured web applications with a single platform. This eliminates the overhead of legacy solutions that -require separate application servers, databases, and caching layers, and their requisite communication overhead and latency, while -allowing the full stack to deployed to distributed locations with full local response handling, providing an incredibly low latency web experience. - -## Web Application Frameworks - -With built-in caching mechanisms, and an easy-to-use JavaScript API for interacting with data, creating full-featured applications -using popular frameworks is a simple and straightforward process. - -Get started today with one of our examples: - -- [Next.js](https://github.com/HarperDB/nextjs-example) -- [React SSR](https://github.com/HarperDB/react-ssr-example) -- [Vue SSR](https://github.com/HarperDB/vue-ssr-example) -- [Svelte SSR](https://github.com/HarperDB/svelte-ssr-example) -- [Solid SSR](https://github.com/HarperDB/solid-ssr-example) - -## Cookie Support - -Harper includes support for authenticated sessions using cookies. This allows you to create secure, authenticated web applications -using best-practice security patterns, allowing users to login and maintain a session without any credential storage on the client side -that can be compromised. A login endpoint can be defined by exporting a resource and calling the `login` method on the request object. For example, this could be a login endpoint in your resources.js file: - -```javascript -export class Login extends Resource { - async post(data) { - const { username, password } = data; - await request.login(username, password); - return { message: 'Logged in!' }; - } -} -``` - -This endpoint can be called from the client side using a standard fetch request, a cookie will be returned, and the session will be maintained by Harper. -This allows web applications to directly interact with Harper and database resources, without needing to go through extra layers of authentication handling. - -## Browser Caching Negotiation - -Browsers support caching negotiation with revalidation, which allows requests for locally cached data to be sent to servers with a tag or timestamp. Harper REST functionality can fully interact with these headers, and return `304 Not Modified` response based on prior `Etag` sent in headers. It is highly recommended that you utilize the [REST interface](../rest) for accessing tables, as it facilitates this downstream browser caching. Timestamps are recorded with all records and are then returned [as the `ETag` in the response](../rest#cachingconditional-requests). Utilizing this browser caching can greatly reduce the load on your server and improve the performance of your web application by being able to instantly use locally cached data after revalidation from the server. - -## Built-in Cross-Origin Resource Sharing (CORS) - -Harper includes built-in support for Cross-Origin Resource Sharing (CORS), which allows you to define which domains are allowed to access your Harper instance. This is a critical security feature for web applications, as it prevents unauthorized access to your data from other domains, while allowing cross-domain access from known hosts. You can define the allowed domains in your [Harper configuration file](../../deployments/configuration#http), and Harper will automatically handle the CORS headers for you. - -## More Resources - -Make sure to check out our developer videos too: - -- [Next.js on Harper | Step-by-Step Guide for Next Level Next.js Performance](https://youtu.be/GqLEwteFJYY) -- [Server-side Rendering (SSR) with Multi-Tier Cache Demo](https://youtu.be/L-tnBNhO9Fc) diff --git a/docs/developers/operations-api/advanced-json-sql-examples.md b/docs/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index c4254430..00000000 --- a/docs/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created Harper will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/docs/developers/operations-api/analytics.md b/docs/developers/operations-api/analytics.md deleted file mode 100644 index 470d4066..00000000 --- a/docs/developers/operations-api/analytics.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: Analytics Operations ---- - -# Analytics Operations - -## get_analytics - -Retrieves analytics data from the server. - -- `operation` _(required)_ - must always be `get_analytics` -- `metric` _(required)_ - any value returned by `list_metrics` -- `start_time` _(optional)_ - Unix timestamp in milliseconds -- `end_time` _(optional)_ - Unix timestamp in milliseconds -- `get_attributes` _(optional)_ - array of attribute names to retrieve -- `conditions` _(optional)_ - array of conditions to filter results (see [search_by_conditions docs](./nosql-operations) for details) - -### Body - -```json -{ - "operation": "get_analytics", - "metric": "resource-usage", - "start_time": 1769198332754, - "end_time": 1769198532754, - "get_attributes": ["id", "metric", "userCPUTime", "systemCPUTime"], - "conditions": [ - { - "attribute": "node", - "operator": "equals", - "value": "node1.example.com" - } - ] -} -``` - -### Response 200 - -```json -[ - { - "id": "12345", - "metric": "resource-usage", - "userCPUTime": 100, - "systemCPUTime": 50 - }, - { - "id": "67890", - "metric": "resource-usage", - "userCPUTime": 150, - "systemCPUTime": 75 - } -] -``` - -## list_metrics - -Returns a list of available metrics that can be queried. - -- `operation` _(required)_ - must always be `list_metrics` -- `metric_types` _(optional)_ - array of metric types to filter results; one or both of `custom` and `builtin`; default is `builtin` - -### Body - -```json -{ - "operation": "list_metrics", - "metric_types": ["custom", "builtin"] -} -``` - -### Response 200 - -```json -["resource-usage", "table-size", "database-size", "main-thread-utilization", "utilization", "storage-volume"] -``` - -## describe_metric - -Provides detailed information about a specific metric, including its structure and available parameters. - -- `operation` _(required)_ - must always be `describe_metric` -- `metric` _(required)_ - name of the metric to describe - -### Body - -```json -{ - "operation": "describe_metric", - "metric": "resource-usage" -} -``` - -### Response 200 - -```json -{ - "attributes": [ - { - "name": "id", - "type": "number" - }, - { - "name": "metric", - "type": "string" - }, - { - "name": "userCPUTime", - "type": "number" - }, - { - "name": "systemCPUTime", - "type": "number" - }, - { - "name": "node", - "type": "string" - } - ] -} -``` diff --git a/docs/developers/operations-api/bulk-operations.md b/docs/developers/operations-api/bulk-operations.md deleted file mode 100644 index b6714552..00000000 --- a/docs/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,255 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` -- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into Harper - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which Harper is running. For example, the path to a CSV on your computer will produce an error if your Harper instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running Harper - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` diff --git a/docs/developers/operations-api/certificate-management.md b/docs/developers/operations-api/certificate-management.md deleted file mode 100644 index f8eea402..00000000 --- a/docs/developers/operations-api/certificate-management.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Add Certificate - -Adds or updates a certificate in the `hdb_certificate` system table. -If a `private_key` is provided it will **not** be stored in `hdb_certificate`, it will be written to file in `/keys/`. -If a `private_key` is not passed the operation will search for one that matches the certificate. If one is not found an error will be returned. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_certificate` -- `name` _(required)_ - a unique name for the certificate -- `certificate` _(required)_ - a PEM formatted certificate string -- `is_authority` _(required)_ - a boolean indicating if the certificate is a certificate authority -- `hosts` _(optional)_ - an array of hostnames that the certificate is valid for -- `private_key` _(optional)_ - a PEM formatted private key string - -### Body - -```json -{ - "operation": "add_certificate", - "name": "my-cert", - "certificate": "-----BEGIN CERTIFICATE-----ZDFAay... -----END CERTIFICATE-----", - "is_authority": false, - "private_key": "-----BEGIN RSA PRIVATE KEY-----Y4dMpw5f... -----END RSA PRIVATE KEY-----" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added certificate: my-cert" -} -``` - ---- - -## Remove Certificate - -Removes a certificate from the `hdb_certificate` system table and deletes the corresponding private key file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_certificate` -- `name` _(required)_ - the name of the certificate - -### Body - -```json -{ - "operation": "remove_certificate", - "name": "my-cert" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed my-cert" -} -``` - ---- - -## List Certificates - -Lists all certificates in the `hdb_certificate` system table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_certificates` - -### Body - -```json -{ - "operation": "list_certificates" -} -``` - -### Response: 200 - -```json -[ - { - "name": "HarperDB-Certificate-Authority-node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\nTANBgkqhk... S34==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": true, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "serial_number": "5235345", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - }, - { - "name": "node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\ngIEcSR1M... 5bv==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": false, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=node.1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject_alt_name": "IP Address:127.0.0.1, DNS:localhost, IP Address:0:0:0:0:0:0:0:1, DNS:node.1", - "serial_number": "5243646", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - } -] -``` diff --git a/docs/developers/operations-api/clustering-nats.md b/docs/developers/operations-api/clustering-nats.md deleted file mode 100644 index 0ba3af74..00000000 --- a/docs/developers/operations-api/clustering-nats.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: Clustering using NATS ---- - -# Clustering using NATS - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional Harper instance with associated subscriptions. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Set Node Replication - -A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_node_replication` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: - - `database` _(optional)_ - the database to replicate from - - `table` _(required)_ - the table to replicate from - - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table -- - -### Body - -```json -{ - "operation": "set_node_replication", - "node_name": "node1", - "subscriptions": [ - { - "table": "dog", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a Harper instance and associated subscriptions from the cluster. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Purge Stream - -Will purge messages from a stream - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `purge_stream` -- `database` _(required)_ - the name of the database where the streams table resides -- `table` _(required)_ - the name of the table that belongs to the stream -- `options` _(optional)_ - control how many messages get purged. Options are: - - `keep` - purge will keep this many most recent messages - - `seq` - purge all messages up to, but not including, this sequence - -### Body - -```json -{ - "operation": "purge_stream", - "database": "dev", - "table": "dog", - "options": { - "keep": 100 - } -} -``` - ---- diff --git a/docs/developers/operations-api/clustering.md b/docs/developers/operations-api/clustering.md deleted file mode 100644 index b040e323..00000000 --- a/docs/developers/operations-api/clustering.md +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -The following operations are available for configuring and managing [Harper replication](../replication/). - -_**If you are using NATS for clustering, please see the**_ [_**NATS Clustering Operations**_](clustering-nats) _**documentation.**_ - -## Add Node - -Adds a new Harper instance to the cluster. If `subscriptions` are provided, it will also create the replication relationships between the nodes. If they are not provided a fully replicating system will be created. [Learn more about adding nodes here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `hostname` or `url` _(required)_ - one of these fields is required. You must provide either the `hostname` or the `url` of the node you want to add -- `verify_tls` _(optional)_ - a boolean which determines if the TLS certificate should be verified. This will allow the Harper default self-signed certificates to be accepted. Defaults to `true` -- `authorization` _(optional)_ - an object or a string which contains the authorization information for the node being added. If it is an object, it should contain `username` and `password` fields. If it is a string, it should use HTTP `Authorization` style credentials -- `retain_authorization` _(optional)_ - a boolean which determines if the authorization credentials should be retained/stored and used everytime a connection is made to this node. If `true`, the authorization will be stored on the node record. Generally this should not be used, as mTLS/certificate based authorization is much more secure and safe, and avoids the need for storing credentials. Defaults to `false`. -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(optional)_ - The relationship created between nodes. If not provided a fully replicated cluster will be setup. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate - - `table` - the table to replicate - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'server-two' to cluster" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance in the cluster. - -_Operation is restricted to super_user roles only_ - -_Note: will attempt to add the node if it does not exist_ - -- `operation` _(required)_ - must always be `update_node` -- `hostname` _(required)_ - the `hostname` of the remote node you are updating -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'server-two'" -} -``` - ---- - -## Remove Node - -Removes a Harper node from the cluster and stops replication, [Learn more about remove node here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are removing - -### Body - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'server-two' from cluster" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. - -`database_sockets` shows the actual websocket connections that exist between nodes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "type": "cluster-status", - "connections": [ - { - "replicateByDefault": true, - "replicates": true, - "url": "wss://server-2.domain.com:9933", - "name": "server-2.domain.com", - "subscriptions": null, - "database_sockets": [ - { - "database": "data", - "connected": true, - "latency": 0.7, - "thread_id": 1, - "nodes": ["server-2.domain.com"], - "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", - "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", - "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", - "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" - } - ] - } - ], - "node_name": "server-1.domain.com", - "is_enabled": true -} -``` - -There is a separate socket for each database for each node. Each node is represented in the connections array, and each database connection to that node is represented in the `database_sockets` array. Additional timing statistics include: - -- `lastCommitConfirmed`: When a commit is sent out, it should receive a confirmation from the remote server; this is the last receipt of confirmation of an outgoing commit. -- `lastReceivedRemoteTime`: This is the timestamp of the transaction that was last received. The timestamp is from when the original transaction occurred. -- `lastReceivedLocalTime`: This is local time when the last transaction was received. If there is a different between this and `lastReceivedRemoteTime`, it means there is a delay from the original transaction to \* receiving it and so it is probably catching-up/behind. -- `sendingMessage`: The timestamp of transaction is actively being sent. This won't exist if the replicator is waiting for the next transaction to send. - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object following the `add_node` schema. - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password2" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "my-table", - "subscribe": true, - "publish": false - } - ] - }, - { - "hostname": "server-three", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password3" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Cluster Set Routes - -Adds a route/routes to the `replication.routes` configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `routes` _(required)_ - the routes field is an array that specifies the routes for clustering. Each element in the array can be either a string or an object with `hostname` and `port` properties. - -### Body - -```json -{ - "operation": "cluster_set_routes", - "routes": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets the replication routes from the Harper config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -[ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } -] -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from the Harper config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` diff --git a/docs/developers/operations-api/components.md b/docs/developers/operations-api/components.md deleted file mode 100644 index 36d61986..00000000 --- a/docs/developers/operations-api/components.md +++ /dev/null @@ -1,553 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a specified template (defaults to the [application template](https://github.com/HarperFast/application-template)). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create -- `template` _(optional)_ - the URL of a git repository to use as a template. Must be a string. Defaults to `https://github.com/HarperFast/application-template` -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. Defaults to `npm install`. Depending on the host environment, you can use this to switch to using an alternative package manager. -- `install_timeout` _(optional)_ - The maximum time, in milliseconds, to wait for the install command to complete. Must be a number. Defaults to `300000` (5 minutes) -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -If deploying with the `payload` option, Harper will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory. - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have installed SSH keys on the server. Ensure the `host` portion of the url exactly matches the `host` used when adding ssh keys to ensure proper authentication. - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string -- `force` _(optional)_ - if true, allows deploying over protected core components. Must be a boolean. Defaults to `false`. Core system components (like `graphql`, `http`, `authentication`, etc.) are protected to prevent accidentally breaking HarperDB. User-defined components can be redeployed without this flag. -- `restart` _(optional)_ - must be either a boolean or the string `rolling`. If set to `rolling`, a rolling restart will be triggered after the component is deployed, meaning that each node in the cluster will be sequentially restarted (waiting for the last restart to start the next). If set to `true`, the restart will not be rolling, all nodes will be restarted in parallel. If `replicated` is `true`, the restart operations will be replicated across the cluster. -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. Defaults to `npm install`. Depending on the host environment, you can use this to switch to using an alternative package manager. -- `install_timeout` _(optional)_ - The maximum time, in milliseconds, to wait for the install command to complete. Must be a number. Defaults to `300000` (5 minutes) - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template", - "replicated": true -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete -- `replicated` _(optional)_ - if true, Harper will replicate the component deletion to all nodes in the cluster. Must be a boolean. -- `restart` _(optional)_ - if true, Harper will restart after dropping the component. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` -- `replicated` _(optional)_ - if true, Harper will replicate the component update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` - ---- - -## Add SSH Key - -Adds an SSH key for deploying components from private repositories. This will also create an ssh config file that will be used when deploying the components. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_ssh_key` -- `name` _(required)_ - the name of the key -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key. -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` -- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nfake\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Added ssh key: harperdb-private-component" -} -``` - -### Generated Config and Deploy Component "package" string examples - -``` -#harperdb-private-component -Host harperdb-private-component.github.com - HostName github.com - User git - IdentityFile /hdbroot/ssh/harperdb-private-component.key - IdentitiesOnly yes -``` - -``` -"package": "git+ssh://git@:.git#semver:v1.2.3" - -"package": "git+ssh://git@harperdb-private-component.github.com:HarperDB/harperdb-private-component.git#semver:v1.2.3" -``` - -Note that `deploy_component` with a package uses `npm install` so the url must be a valid npm format url. The above is an example of a url using a tag in the repo to install. - ---- - -## Update SSH Key - -Updates the private key contents of an existing SSH key. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_ssh_key` -- `name` _(required)_ - the name of the key to be updated -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key. -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` -- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "update_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nNEWFAKE\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Updated ssh key: harperdb-private-component" -} -``` - -## Delete SSH Key - -Deletes a SSH key. This will also remove it from the generated SSH config. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_ssh_key` -- `name` _(required)_ - the name of the key to be deleted -- `replicated` _(optional)_ - if true, Harper will replicate the key deletion to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "name": "harperdb-private-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Deleted ssh key: harperdb-private-component" -} -``` - ---- - -## List SSH Keys - -List off the names of added SSH keys - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_ssh_keys` - -### Body - -```json -{ - "operation": "list_ssh_keys" -} -``` - -### Response: 200 - -```json -[ - { - "name": "harperdb-private-component" - } -] -``` - -_Note: Additional SSH keys would appear as more objects in this array_ - ---- - -## Set SSH Known Hosts - -Sets the SSH known_hosts file. This will overwrite the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_ssh_known_hosts` -- `known_hosts` _(required)_ - The contents to set the known_hosts to. Line breaks must be delimite d with -- `replicated` _(optional)_ - if true, Harper will replicate the known hosts to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_ssh_known_hosts", - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Known hosts successfully set" -} -``` - -## Get SSH Known Hosts - -Gets the contents of the known_hosts file - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_ssh_known_hosts` - -### Body - -```json -{ - "operation": "get_ssh_known_hosts" -} -``` - -### Response: 200 - -```json -{ - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - ---- - -## Install Node Modules - -:::warning Deprecated -This operation is deprecated, as it is handled automatically by [deploy_component](#deploy-component) and [restart](./system-operations#restart). -::: - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` diff --git a/docs/developers/operations-api/configuration.md b/docs/developers/operations-api/configuration.md deleted file mode 100644 index 9872da4f..00000000 --- a/docs/developers/operations-api/configuration.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -## Set Configuration - -Modifies the Harper configuration file parameters. Must follow with a [restart](./system-operations#restart) or [restart_service](./system-operations#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the Harper configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "replication": { - "hostname": "node1", - "databases": "*", - "routes": null, - "url": "wss://127.0.0.1:9925" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` diff --git a/docs/developers/operations-api/custom-functions.md b/docs/developers/operations-api/custom-functions.md deleted file mode 100644 index 2c469bf4..00000000 --- a/docs/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,281 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -:::warning Deprecated -These operations are deprecated. -::: - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDStudio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. Harper Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/docs/developers/operations-api/databases-and-tables.md b/docs/developers/operations-api/databases-and-tables.md deleted file mode 100644 index 936425c3..00000000 --- a/docs/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts above 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** - -_Note: Harper will automatically create new attributes on insert and update if they do not already exist within the database._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with Harper not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (Harper actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup Harper databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/docs/developers/operations-api/index.md b/docs/developers/operations-api/index.md deleted file mode 100644 index ad44d9de..00000000 --- a/docs/developers/operations-api/index.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling Harper. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../deployments/configuration#operationsapi), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security#basic-auth) or [JWT authentication](./security#jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Clustering with NATS](operations-api/clustering-nats) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [System Operations](operations-api/system-operations) -- [Configuration](operations-api/configuration) -- [Certificate Management](operations-api/certificate-management) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) -- [Analytics](operations-api/analytics) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/docs/developers/operations-api/jobs.md b/docs/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/docs/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/docs/developers/operations-api/logs.md b/docs/developers/operations-api/logs.md deleted file mode 100644 index 2c2ba194..00000000 --- a/docs/developers/operations-api/logs.md +++ /dev/null @@ -1,733 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read Harper Log - -Returns log outputs from the primary Harper log based on the provided search criteria. [Read more about Harper logging here](../../administration/logging/standard-logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` -- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order -- `filter` _(optional)_ - a query string that must be a substring of each log line returned - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. [Read more about Harper transaction logs here](logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the Harper configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. [Read more about Harper transaction logs here.](../../administration/logging/transaction-logging#read_transaction_log) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. [Read more about Harper transaction logs here](logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values \[`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the Harper user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the Harper configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/docs/developers/operations-api/nosql-operations.md b/docs/developers/operations-api/nosql-operations.md deleted file mode 100644 index db07e0da..00000000 --- a/docs/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,389 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID or incremented number (depending on type) will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `upsert` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_attribute` - deprecated in favor of `attribute` -- `value` _(required)_ - value you wish to search - wild cards are allowed -- `search_value` - deprecated in favor of `value` -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "attribute": "owner_name", - "value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: - - `attribute` _(required)_ - The attribute to sort by - - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) - - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: - - `attribute` _(required)_ - the attribute you wish to search, can be any attribute - - `search_attribute` - deprecated in favor of `attribute` - - `comparator` _(required)_ - the type of search to perform - `equals`, `not_equal`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_type` - deprecated in favor of `comparator` - - `value` _(required)_ - case-sensitive value you wish to search. If the `comparator` is `between` then use an array of two values to search between (both inclusive) - - `search_value` - deprecated in favor of `value` - Or a set of grouped conditions has the following properties: - - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` - - `conditions` _(required)_ - the array of conditions objects as described above. - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "sort": { - "attribute": "id", - "next": { - "attribute": "age", - "descending": true - } - }, - "get_attributes": ["*"], - "conditions": [ - { - "attribute": "age", - "comparator": "between", - "value": [5, 8] - }, - { - "attribute": "weight_lbs", - "comparator": "greater_than", - "value": 40 - }, - { - "operator": "or", - "conditions": [ - { - "attribute": "adorable", - "comparator": "equals", - "value": true - }, - { - "attribute": "lovable", - "comparator": "equals", - "value": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/docs/developers/operations-api/quickstart-examples.md b/docs/developers/operations-api/quickstart-examples.md deleted file mode 100644 index a6c8f637..00000000 --- a/docs/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -Harper recommends utilizing [Harper Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the Harper Operations API. - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in Harper are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. - -Harper does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and Harper will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -Harper supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/docs/developers/operations-api/registration.md b/docs/developers/operations-api/registration.md deleted file mode 100644 index d5d278c5..00000000 --- a/docs/developers/operations-api/registration.md +++ /dev/null @@ -1,231 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the Harper instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Install Usage License - -Install a Harper license for a block of usage. Multiple usage blocks may be installed, and they will be used up sequentially, with the earliest installed blocks used first. A license is installed -by creating a string that consists of three base64url encoded blocks, separated by dots. The three blocks consist of: - -- `header`: This is a JSON object with two properties: - - `typ`: should be "Harper-License" - - `alg`: should be "EdDSA" - -This JSON object should be converted to base64url (conversion from utf-8 to base64url) and is the first base64url block. - -- license payload: This is a JSON object with properties: - - `id` _(required)_ - A unique id for the license - - `level` _(required)_ - Usage level number - - `region` _(required)_ - The region id where this license can be used - - `reads` _(required)_ - The number of allowed reads - - `readBytes` _(required)_ - The number of allowed read bytes - - `writes` _(required)_ - The number of allowed writes - - `writeBytes` _(required)_ - The number of allowed write bytes - - `realTimeMessages` _(required)_ - The number of allowed real-time messages - - `realTimeBytes` _(required)_ - The number of allowed real-time message bytes - - `cpuTime` _(optional)_ - The allowed amount of CPU time consumed by application code - - `storage` _(optional)_ - Maximum of storage that may be used - - `expiration` _(required)_ - The date when this block expires, as an ISO date - -This JSON object should be converted to base64url (conversion from utf-8 to base64url) and is the second base64url block. - -For example: - -```json -{ - "id": "license-717b-4c6c-b69d-b29014054ab7", - "level": 2, - "region": "us-nw-2", - "reads": 2000000000, - "readBytes": 8000000000000, - "writes": 500000000, - "writeBytes": 1000000000000, - "realTimeMessages": 10000000000, - "realTimeBytes": 40000000000000, - "cpuTime": 108000, - "storage": 400000000000000, - "expiration": "2025-07-25T21:17:21.248Z" -} -``` - -- `signature`: This is the cryptographic signature, signed by Harper, of the first two blocks, separated by a dot, `header.payload`. This is also converted to base64url. - -The three base64url blocks are combined to form the `license` property value in the operation. - -- `operation` _(required)_ - must always be `install_usage_license` -- `license` _(required)_ - This is the combination of the three blocks in the form `header.payload.signature` - -### Body - -```json -{ - "operation": "install_usage_license", - "license": "abc...0123.abc...0123.abc...0123" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully installed usage license" -} -``` - ---- - -## Get Usage Licenses - -This will retrieve and return _all_ usage licenses (including expired, exhausted, and licenses in any other state), with counts of how much of the limits have been consumed. - -- `operation` _(required)_ - must always be `get_usage_licenses` -- `region` _(optional)_ - will filter by region when supplied - -### Body - -```json -{ - "operation": "get_usage_licenses" -} -``` - -### Response: 200 - -```json -[ - { - "id": "license-717b-4c6c-b69d-b29014054ab7", - "level": 2, - "region": "us-nw-2", - "reads": 2000000000, - "usedReads": 1100000000, - "readBytes": 8000000000000, - "usedReadBytes": 3000000000000, - "writes": 500000000, - "usedWrites": 300000000, - "writeBytes": 1000000000000, - "usedWriteBytes": 4300000000000, - "realTimeMessages": 10000000000, - "usedRealTimeMessages": 2000000000, - "realTimeBytes": 40000000000000, - "usedRealTimeBytes": 13000000000000, - "cpuTime": 108000, - "usedCpuTime": 41000, - "storage": 400000000000000, - "expiration": "2025-07-25T21:17:21.248Z" - }, - { - "id": "license-4c6c-b69d-b29014054ab7-717b", - "level": 2, - "region": "us-nw-2", - "reads": 2000000000, - "usedReads": 0, - "readBytes": 8000000000000, - "usedReadBytes": 0, - "writes": 500000000, - "usedWrites": 0, - "writeBytes": 1000000000000, - "usedWriteBytes": 0, - "realTimeMessages": 10000000000, - "usedRealTimeMessages": 0, - "realTimeBytes": 40000000000000, - "usedRealTimeBytes": 0, - "cpuTime": 108000, - "usedCpuTime": 0, - "storage": 400000000000000, - "expiration": "2025-09-25T21:17:21.248Z" - }, - { - "id": "license-4c6c-b69d-b29014054ab7-717b", - "level": 2, - "region": "us-se-2", - "reads": 2000000000, - "usedReads": 0, - "readBytes": 8000000000000, - "usedReadBytes": 0, - "writes": 500000000, - "usedWrites": 0, - "writeBytes": 1000000000000, - "usedWriteBytes": 0, - "realTimeMessages": 10000000000, - "usedRealTimeMessages": 0, - "realTimeBytes": 40000000000000, - "usedRealTimeBytes": 0, - "cpuTime": 108000, - "usedCpuTime": 0, - "storage": 400000000000000, - "expiration": "2025-11-25T21:17:21.248Z" - } -] -``` - ---- - -## Get Fingerprint - -(Deprecated) -Returns the Harper fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -(Deprecated) -Sets the Harper license as generated by Harper License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/docs/developers/operations-api/sql-operations.md b/docs/developers/operations-api/sql-operations.md deleted file mode 100644 index 4b7076bb..00000000 --- a/docs/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: SQL Operations ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/docs/developers/operations-api/system-operations.md b/docs/developers/operations-api/system-operations.md deleted file mode 100644 index d39e93cb..00000000 --- a/docs/developers/operations-api/system-operations.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: System Operations ---- - -# System Operations - -## Restart - -Restarts the Harper instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified Harper service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` -- `replicated` _(optional)_ - must be a boolean. If set to `true`, Harper will replicate the restart service operation across all nodes in the cluster. The restart will occur as a rolling restart, ensuring that each node is fully restarted before the next node begins restarting. - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'metrics', 'threads', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Set Status - -Sets a status value that can be used for application-specific status tracking. Status values are stored in memory and are not persisted across restarts. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_status` -- `id` _(required)_ - the key identifier for the status -- `status` _(required)_ - the status value to set (string between 1-512 characters) - -### Body - -```json -{ - "operation": "set_status", - "id": "primary", - "status": "active" -} -``` - -### Response: 200 - -```json -{ - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 -} -``` - -### Notes - -- The `id` parameter must be one of the allowed status types: 'primary', 'maintenance', or 'availability' -- If no `id` is specified, it defaults to 'primary' -- For 'availability' status, only 'Available' or 'Unavailable' values are accepted -- For other status types, any string value is accepted - ---- - -## Get Status - -Retrieves a status value previously set with the set_status operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_status` -- `id` _(optional)_ - the key identifier for the status to retrieve (defaults to all statuses if not provided) - -### Body - -```json -{ - "operation": "get_status", - "id": "primary" -} -``` - -### Response: 200 - -```json -{ - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 -} -``` - -If no id parameter is provided, all status values will be returned: - -```json -[ - { - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 - }, - { - "id": "maintenance", - "status": "scheduled", - "__createdtime__": 1621364600123, - "__updatedtime__": 1621364600123 - } -] -``` - ---- - -## Clear Status - -Removes a status entry by its ID. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `clear_status` -- `id` _(required)_ - the key identifier for the status to remove - -### Body - -```json -{ - "operation": "clear_status", - "id": "primary" -} -``` - -### Response: 200 - -```json -{ - "message": "Status successfully cleared" -} -``` diff --git a/docs/developers/operations-api/token-authentication.md b/docs/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/docs/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/docs/developers/operations-api/users-and-roles.md b/docs/developers/operations-api/users-and-roles.md deleted file mode 100644 index 91f222b9..00000000 --- a/docs/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "developer" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. Harper will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your Harper instance. If set to false, user will not be able to access your instance of Harper. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. Harper will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your Harper instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/docs/developers/real-time.md b/docs/developers/real-time.md deleted file mode 100644 index 9c5c79e4..00000000 --- a/docs/developers/real-time.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -Harper provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. Harper supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -Harper real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a Harper application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -Harper is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -Harper supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in Harper is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -Harper supports MQTT with its `mqtt` server module and Harper supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - mTLS: false - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). - -#### Capabilities - -Harper's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In Harper topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -Harper supports QoS 0 and 1 for publishing and subscribing. - -Harper supports multi-level topics, both for subscribing and publishing. Harper also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. Harper currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -#### Events - -JavaScript components can also listen for MQTT events. This is available on the server.mqtt.events object. For example, to set up a listener/callback for when MQTT clients connect and authorize, we can do: - -```javascript -server.mqtt.events.on('connected', (session, socket) => { - console.log('client connected with id', session.clientId); -}); -``` - -The following MQTT events are available: - -- `connection` - When a client initially establishes a TCP or WS connection to the server -- `connected` - When a client establishes an authorized MQTT connection -- `auth-failed` - When a client fails to authenticate -- `disconnected` - When a client disconnects from the server - -### Ordering - -Harper is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. Harper is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, Harper does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because Harper prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and Harper will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Keep-Alive monitoring | :heavy_check_mark: | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties retain handling | :heavy_check_mark: | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/docs/developers/replication/index.md b/docs/developers/replication/index.md deleted file mode 100644 index 703f00f3..00000000 --- a/docs/developers/replication/index.md +++ /dev/null @@ -1,300 +0,0 @@ ---- -title: Replication/Clustering ---- - -# Replication/Clustering - -Harper’s replication system is designed to make distributed data replication fast and reliable across multiple nodes. This means you can easily build a distributed database that ensures high availability, disaster recovery, and data localization. The best part? It’s simple to set up, configure, and manage. You can easily add or remove nodes, choose which data to replicate, and monitor the system’s health without jumping through hoops. - -### Replication Overview - -Harper replication uses a peer-to-peer model where every node in your cluster can send and subscribe to data. Each node connects through WebSockets, allowing data to flow seamlessly in both directions. By default, Harper takes care of managing these connections and subscriptions, so you don’t have to worry about data consistency. The system is designed to maintain secure, reliable connections between nodes, ensuring that your data is always safe. - -### Replication Configuration - -To connect your nodes, you need to provide hostnames or URLs for the nodes to connect to each other. This can be done via configuration or through operations. To configure replication, you can specify connection information the `replication` section of the [harperdb-config.yaml](../deployments/configuration). Here, you can specify the host name of the current node, and routes to connect to other nodes, for example: - -```yaml -replication: - hostname: server-one - routes: - - server-two - - server-three -``` - -In this example, the current node is `server-one`, and it will connect to `server-two` and `server-three`. Routes to other nodes can also be configured with URLs or ports: - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9933 # URL based route - - hostname: server-three # define a hostname and port - port: 9933 -``` - -You can also use the [operations API](./operations-api/clustering) to dynamically add and remove nodes from the cluster. This is useful for adding new nodes to a running cluster or removing nodes that are no longer needed. For example (note this is the basic form, you would also need to provide the necessary credentials for the operation, see the section on securing connections for more details): - -```json -{ - "operation": "add_node", - "hostname": "server-two" -} -``` - -These operations will also dynamically generating certificates as needed, if there are no existing signed certificates, or if the existing certificates are not valid for the new node. - -Harper will also automatically replicate node information to other nodes in a cluster ([gossip-style discovery](https://highscalability.com/gossip-protocol-explained/)). This means that you only need to connect to one node in an existing cluster, and Harper will automatically detect and connect to other nodes in the cluster (bidirectionally). - -By default, Harper will replicate all the data in all the databases. You can configure which databases are replicated, and then override this behavior on a per-table basis. For example, you can indicate which databases should be replicated by default, here indicating you want to replicate the `data` and `system` databases: - -```yaml -replication: - databases: - - data - - system -``` - -By default, all tables within a replicated database will be replicated. Transactions are replicated atomically, which may involve data across multiple tables. However, you can also configure replication for individual tables, and disable and exclude replication for specific tables in a database by setting `replicate` to `false` in the table definition: - -```graphql -type LocalTableForNode @table(replicate: false) { - id: ID! - name: String! -} -``` - -You can also control which nodes data is replicated to, and how many nodes data is replicated to. By default, Harper will replicate data to all nodes in the cluster, but you can control where data is replicated to with the [sharding configuration and APIs](replication/sharding). - -By default, replication connects to the secure port 9933. You can configure the replication port in the `replication` section. - -```yaml -replication: - securePort: 9933 -``` - -### Securing Connections - -Harper supports the highest levels of security through public key infrastructure based security and authorization. Replication connections use WebSocket protocol and support multiple authentication methods depending on your security configuration: - -- **Certificate-based authentication** (recommended for production): Nodes are identified by the certificate's common name (CN) or Subject Alternative Names (SANs) -- **IP-based authentication** (for development/testing): Nodes are identified by their IP address when using insecure connections (see [Insecure Connection IP-based Authentication](#insecure-connection-ip-based-authentication) below) - -When using certificate-based authentication, Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to check if certificates have been revoked. This ensures that compromised certificates cannot be used for replication connections. OCSP and CRL verification works automatically with certificates from public certificate authorities (like Let's Encrypt or DigiCert) when `enableRootCAs` is enabled, as these certificates include the necessary OCSP responder URLs and CRL distribution points. For self-signed certificates or private CAs that don't support OCSP/CRL, you can use Harper's manual certificate revocation feature (see [Revoking Certificates](#revoking-certificates) below). Certificate verification settings follow the same configuration as HTTP mTLS connections (see [certificate verification configuration](../../deployments/configuration#http)). - -#### Provide your own certificates - -If you want to secure your Harper connections with your own signed certificates, you can easily do so. Whether you have certificates from a public authority (like Let's Encrypt or Digicert) or a corporate certificate authority, you can use them to authenticate nodes securely. You can then allow nodes to authorize each other by checking the certificate against the standard list of root certificate authorities by enabling the `enableRootCAs` option in the config: - -``` -replication - enableRootCAs: true -``` - -And then just make sure the certificate’s common name (CN) matches the node's hostname. - -#### Setting Up Custom Certificates - -There are two ways to configure Harper with your own certificates: - -1. Use the `add_certificate` operation to upload them. -1. Or, specify the certificate paths directly in the `replication` section of the `harperdb-config.yaml` file. - -If your certificate is signed by a trusted public authority, just provide the path to the certificate and private key. If you're using self-signed certificates or a private certificate authority, you’ll also need to provide the certificate authority (CA) details to complete the setup.\ -\ -Example configuration: - -```yaml -tls: - certificate: /path/to/certificate.pem - certificateAuthority: /path/to/ca.pem - privateKey: /path/to/privateKey.pem -``` - -With this in place, Harper will load the provided certificates into the certificate table and use these to secure and authenticate connections between nodes. - -You have the option to skip providing a specific certificate authority (CA) and instead verify your certificate against the root certificates included in the bundled Mozilla CA store. This bundled CA store, provided by Node.js, is a snapshot of Mozilla's CA certificates that is fixed at the time of each Node.js release. - -To enable the root certificates set `replication.enableRootCAs` to `true` in the `harperdb-config.yaml` file: - -```yaml -replication: - enableRootCAs: true -``` - -#### Cross-generated certificates - -Harper can also generate its own certificates for secure connections. This is useful for setting up secure connections between nodes when no existing certificates are available, and can be used in development, testing, or production environments. Certificates will be automatically requested and signed between nodes to support a form of distributed certificate generation and signing. To establish secure connections between nodes using cross-generated certificates, you simply use the [`add_node` operation](./operations-api/clustering) over SSL, and specify the temporary authentication credentials to use for connecting and authorizing the certificate generation and signing. \ -\ -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -When you connect to another node (e.g., `server-two`), Harper uses secure WebSockets and the provided credentials to establish the connection. - -If you’re working with a fresh install, you’ll need to set `verify_tls` to `false` temporarily, so the self-signed certificate is accepted. Once the connection is made, Harper will automatically handle the certificate signing process: - -- It creates a certificate signing request (CSR), sends it to `server-two`, which then signs it and returns the signed certificate along with the certificate authority (CA). -- The signed certificate is stored for future connections between the nodes, ensuring secure communication. - -**Important:** Your credentials are not stored—they are discarded immediately after use. - -You can also provide credentials in HTTP Authorization format (Basic auth, Token auth, or JWT). This is helpful for handling authentication with the required permissions to generate and sign certificates. - -Additionally, you can use `set_node` as an alias for the `add_node` operation if you prefer. - -#### Revoking Certificates - -Certificates used in replication can be revoked by using the certificate serial number and either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config in `harperdb-config.yaml`. - -To utilize the `revoked_certificates` attribute in the `hdb_nodes` table, you can use the `add_node` or `update_node` operation to add the certificate serial number to the `revoked_certificates` array. For example: - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "revoked_certificates": ["1769F7D6A"] -} -``` - -To utilize the replication route config in `harperdb-config.yaml`, you can add the certificate serial number to the `revokedCertificates` array. For example: - -```yaml -replication: - routes: - - hostname: server-three - port: 9930 - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -#### Removing Nodes - -Nodes can be removed from the cluster using the [`remove_node` operation](./operations-api/clustering). This will remove the node from the cluster, and stop replication to and from the node. For example: - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -#### Insecure Connection IP-based Authentication - -You can completely disable secure connections and use IP addresses to authenticate nodes with each other. This can be useful for development and testing, or within a secure private network, but should never be used for production with publicly accessible servers. To disable secure connections, simply configure replication within an insecure port, either by [configuring the operations API](../deployments/configuration) to run on an insecure port or replication to run on an insecure port. And then set up IP-based routes to connect to other nodes: - -```yaml -replication: - port: 9933 - routes: - - 127.0.0.2 - - 127.0.0.3 -``` - -Note that in this example, we are using loop back addresses, which can be a convenient way to run multiple nodes on a single machine for testing and development. - -### Controlled Replication Flow - -By default, Harper will replicate all data in all databases, with symmetric bi-directional flow between nodes. However, there are times when you may want to control the replication flow between nodes, and dictate that data should only be replicated in one direction between certain nodes. This can be done by setting the direction in the `replicates` attribute of the node definition when adding the node or configuring the replication route. For example, to configure a node to only send data to `node-two` (which only receives), and only receive data from `node-three` (which only sends) you can add the following to the replication route: - -```yaml -replication: - databases: - - data - routes: - - host: node-two - replicates: - sends: false - receives: true - - host: node-three - replicates: - sends: true - receives: false -``` - -When using controlled flow replication, you will typically have different route configurations for each node to every other node. In that case, typically you do want to ensure that you are _not_ replicating the `system` database, since the `system` database containes the node configurations, and replicating the `system` database will cause all nodes to be replicated and have identical route configurations. - -#### Explicit Subscriptions - -By default, Harper automatically handles connections and subscriptions between nodes, ensuring data consistency across your cluster. It even uses data routing to manage node failures. However, you can manage these connections manually by explicitly subscribing to nodes. This should _not_ be used for production replication and should be avoided and exists only for testing, debugging, and legacy migration. This will likely be removed in V5. If you choose to manage subscriptions manually, Harper will no longer handle data consistency for you. This means there’s no guarantee that all nodes will have consistent data if subscriptions don’t fully replicate in all directions. If a node goes down, it’s possible that some data wasn’t replicated before the failure. If you want single direction replication, you can use controlled replication flow described above. - -#### How to Subscribe to Nodes - -To explicitly subscribe to a node, you can use operations like `add_node` and define the subscriptions. For example, you can configure a node (e.g., `server-two`) to publish transactions on a specific table (e.g., `dev.my-table`) without receiving data from that node. - -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": false - } - ] -} -``` - -To update an explicit subscription you can use the [`update_node` operation](./operations-api/clustering). - -Here we are updating the subscription to receive transactions on the `dev.my-table` table from the `server-two` node. - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": true - } - ] -} -``` - -#### Monitoring Replication - -You can monitor the status of replication through the operations API. You can use the [`cluster_status` operation](./operations-api/clustering) to get the status of replication. For example: - -```json -{ - "operation": "cluster_status" -} -``` - -#### Database Initial Synchronization and Resynchronization - -When a new node is added to the cluster, if its database has not previously been synced, it will initially download the database from the first node it connects to. This will copy every record from the source database to the new node. Once the initial synchronization is complete, the new node will enter replication mode and receive records from each node as they are created, updated, or deleted. If a node goes down and comes back up, it will also resynchronize with the other nodes in the cluster, to ensure that it has the most up-to-date data. - -You may also specify a `start_time` in the `add_node` to specify that when a database connects, that it should not download the entire database, but only data since a given starting time. - -**Advanced Configuration** - -You can also check the configuration of the replication system, including the current known nodes and certificates, by querying the hdb_nodes and hdb_certificate table: - -```json -{ - "operation": "search_by_value", - "database": "system", - "table": "hdb_nodes", - "attribute": "name", - "value": "*" -} -``` diff --git a/docs/developers/replication/sharding.md b/docs/developers/replication/sharding.md deleted file mode 100644 index 307e38f1..00000000 --- a/docs/developers/replication/sharding.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: Sharding ---- - -Harper's replication system supports various levels of replication or sharding. Harper can be configured or set up to replicate to different data to different subsets of nodes. This can be used facilitate horizontally scalability of storage and write performance, while maintaining optimal strategies of data locality and data consistency. When sharding is configured, Harper will replicate data to only a subset of nodes, based on the sharding configuration, and can then retrieve data from the appropriate nodes as needed to fulfill requests for data. - -There are two main ways to setup sharding in Harper. The approach is to use dynamic sharding, where the location or residency of records is determined dynamically based on where the record was written and record data, and records can be dynamically relocated based on where they are accessed. This residency information can be specific to each record, and can vary based on the computed residency and where the data is written and accessed. - -The second approach is define specific shards, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed, or content. This approach is more static, but can be more efficient for certain use cases, and means that the location of data can always be predictably determined based on the primary key. - -## Configuration For Dynamic Sharding - -By default, Harper will replicate all data to all nodes. However, replication can easily be configured for "sharding", or storing different data in different locations or nodes. The simplest way to configure sharding and limit replication to improve performance and efficiency is to configure a replication-to count. This will limit the number of nodes that data is replicated to. For example, to specify that writes should replicate to 2 other nodes besides the node that first stored the data, you can set the `replicateTo` to 2 in the `replication` section of the `harperdb-config.yaml` file: - -```yaml -replication: - replicateTo: 2 -``` - -This will ensure that data is replicated to two other nodes, so that each record will be stored on three nodes in total. - -With a sharding configuration (or customization below) in place, requests will for records that don't reside on the server handling requests will automatically be forwarded to the appropriate node. This will be done transparently, so that the client will not need to know where the data is stored. - -## Replication Control with Headers - -With the REST interface, replication levels and destinations can also specified with the `X-Replicate-To` header. This can be used to indicate the number of additional nodes that data should be replicated to, or to specify the nodes that data should be replicated to. The `X-Replicate-To` header can be used with the `POST` and `PUT` methods. This header can also specify if the response should wait for confirmation from other nodes, and how many, with the `confirm` parameter. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: 2;confirm=1 - -... -``` - -You can also explicitly specify destination nodes by providing a comma-separated list of node hostnames. For example, to specify that data should be replicated to nodes `node1` and `node2`, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: node1,node2 -``` - -(This can also be used with the `confirm` parameter.) - -## Replication Control with Operations - -Likewise, you can specify replicateTo and confirm parameters in the operation object when using the Harper API. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following operation object: - -```json -{ - "operation": "update", - "schema": "dev", - "table": "MyTable", - "hashValues": [3], - "record": { - "name": "John Doe" - }, - "replicateTo": 2, - "replicatedConfirmation": 1 -} -``` - -or you can specify nodes: - -```jsonc -{ - // ... - "replicateTo": ["node-1", "node-2"], - // ... -} -``` - -## Programmatic Replication Control - -Additionally, you can specify `replicateTo` and `replicatedConfirmation` parameters programmatically in the context of a resource. For example, you can define a put method: - -```javascript -class MyTable extends tables.MyTable { - put(record) { - const context = this.getContext(); - context.replicateTo = 2; // or an array of node names - context.replicatedConfirmation = 1; - return super.put(record); - } -} -``` - -## Configuration for Static Sharding - -Alternatively, you can configure static sharding, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key. The `shard` is identified by a number. To configure the shard for each node, you can specify the shard number in the `replication`'s `shard` in the configuration: - -```yaml -replication: - shard: 1 -``` - -Alternatively, you can configure the `shard` under the `replication` `routes`. This allows you to assign a specific shard id based on the routing configuration. - -```yaml -replication: - routes: - - hostname: node1 - shard: 1 - - hostname: node2 - shard: 2 -``` - -Or you can specify a `shard` number by including that property in an `add_node` operation or `set_node` operation, to dynamically assign a node to a shard. - -You can then specify shard number in the `setResidency` or `setResidencyById` functions below. - -## Custom Sharding - -You can also define a custom sharding strategy by specifying a function to compute the "residency" or location of where records should be stored and reside. To do this we use the `setResidency` method, providing a function that will determine the residency of each record. The function you provide will be called with the record entry, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the `id` field, you can use the following code: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? ['node1'] : ['node2']; -}); -``` - -With this approach, the record metadata, which includes the residency information, and any indexed properties, will be replicated to all nodes, but the full record will only be replicated to the nodes specified by the residency function. - -The `setResidency` function can alternately return a shard number, which will replicate the data to all the nodes in that shard: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? 1 : 2; -}); -``` - -### Custom Sharding By Primary Key - -Alternately you can define a custom sharding strategy based on the primary key alone. This allows records to be retrieved without needing access to the record data or metadata. With this approach, data will only be replicated to the nodes specified by the residency function (the record metadata doesn't need to replicated to all nodes). To do this, you can use the `setResidencyById` method, providing a function that will determine the residency or shard of each record based on the primary key. The function you provide will be called with the primary key, and should return a `shard` number or an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the primary key, you can use the following code: - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? 1 : 2; // return shard number -}); -``` - -or - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? ['node1'] : ['node2']; // return array of node hostnames -}); -``` - -### Disabling Cross-Node Access - -Normally sharding allows data to be stored in specific nodes, but still allows access to the data from any node. However, you can also disable cross-node access so that data is only returned if is stored on the node where it is accessed. To do this, you can set the `replicateFrom` property on the context of operation to `false`: - -```json -{ - "operation": "search_by_id", - "table": "MyTable", - "ids": [3], - "replicateFrom": false -} -``` - -Or use a header with the REST API: - -```http -GET /MyTable/3 -X-Replicate-From: none -``` diff --git a/docs/developers/rest.md b/docs/developers/rest.md deleted file mode 100644 index 7e085d8e..00000000 --- a/docs/developers/rest.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: REST ---- - -# REST - -## REST - -Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](applications/) and [defining schemas](applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -### GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -#### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -##### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -#### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -#### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. Note that this will only work for properties that are declared in the schema. - -### PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -#### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -### DELETE - -This can be used to delete a record or records. - -### `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -### `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -### POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -#### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -### Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in Harper. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the attributes needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /Product/?category=software&price=gt=100&price=lt=200 -``` - -Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: - -``` -GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z -``` - -You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: - -```http -GET /Product/?name==Keyboard* -``` - -**Chained Conditions** - -You can also specify that a range condition must be met for a single attribute value by chaining conditions. This is done by omitting the name in the name-value pair. For example, to find products with a price between 100 and 200, you could write: - -```http -GET /Product/?price=gt=100<=200 -``` - -Chaining can be used to combined `gt` or `ge` with `lt` or `le` to specify a range of values. Currently, no other types of chaining are supported. - -Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. - -Here is a full list of the supported FIQL-style operators/comparators: - -- `==`: equal -- `=lt=`: less than -- `=le=`: less than or equal -- `=gt=`: greater than -- `=ge=`: greater than or equal -- `=ne=`, !=: not equal -- `=ct=`: contains the value (for strings) -- `=sw=`, `==*`: starts with the value (for strings) -- `=ew=`: ends with the value (for strings) -- `=`, `===`: strict equality (no type conversion) -- `!==`: strict inequality (no type conversion) - -#### Unions - -Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: - -```http -GET /Product/?rating=5|featured=true -``` - -#### Grouping of Operators - -Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: - -```http -GET /Product/?rating=5|(price=gt=100&price=lt=200) -``` - -Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: - -```http -GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] -``` - -And the tags could be safely generated from user inputs in a tag array like: - -```javascript -let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; -``` - -More complex queries can be created by further nesting groups: - -```http -GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] -``` - -### Query Calls - -Harper has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -#### `select(properties)` - -This function allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. -- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. - -To get a list of product names with a category of software: - -```http -GET /Product/?category=software&select(name) -``` - -#### `limit(start,end)` or `limit(end)` - -This function specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -#### `sort(property)`, `sort(+property,-property,...)` - -This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. - -For example, to sort by product name (in ascending order): - -```http -GET /Product?rating=gt=3&sort(+name) -``` - -To sort by rating in ascending order, then by price in descending order for products with the same rating: - -```http -GET /Product?sort(+rating,-price) -``` - -## Relationships - -Harper supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. - -### Chained Attributes and Joins - -To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides Harper's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - brandId: ID @indexed - brand: Brand @relationship(from: "brandId") -} -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: "brandId") -} -``` - -And then you could query a product by brand name: - -```http -GET /Product/?brand.name=Microsoft -``` - -This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. - -The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: - -```http -GET /Brand/?products.name=Keyboard -``` - -This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". - -#### Chained/Nested Select - -Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand) -``` - -We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand{name}) -``` - -Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: - -```http -GET /Product/?name=Keyboard&select(name,brand{name,id}) -``` - -When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. - -#### Many-to-many Relationships (Array of Foreign Keys) - -Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - resellerIds: [ID] @indexed - resellers: [Reseller] @relationship(from: "resellerId") -} -type Reseller @table { - id: ID @primaryKey - name: String - ... -} -``` - -The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": - -```http -GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) -``` - -One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): - -```http -PUT /Product/123 -Content-Type: application/json - -{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], -...} -``` - -#### Type Conversion - -Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: - -- `name==null`: Will convert the value to `null` for searching. -- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==number:123`: Will explicitly convert the value after "number:" to a number. -- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. -- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) -- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. - -If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. - -For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. - -#### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, Harper supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -#### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/docs/developers/security/basic-auth.md b/docs/developers/security/basic-auth.md deleted file mode 100644 index 22361432..00000000 --- a/docs/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -Harper uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.** - -A header is added to each HTTP request. The header key is `Authorization` the header value is `Basic <>`. - -## Authentication in Harper Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for Harper. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/docs/developers/security/certificate-management.md b/docs/developers/security/certificate-management.md deleted file mode 100644 index 5fc6cb2c..00000000 --- a/docs/developers/security/certificate-management.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for Harper external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of Harper does not have HTTPS enabled (see [configuration](../../deployments/configuration#http) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -By default Harper will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your Harper node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable Harper HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the Harper configuration with the path of your new certificate files, and then restart Harper. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set Harper will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### mTLS - -Mutual TLS (mTLS) is a security protocol that requires both the client and the server to present certificates to each other. Requiring a client certificate can be useful for authenticating clients and ensuring that only authorized clients can access your Harper instance. This can be enabled by setting the `http.mtls` configuration in `harperdb-config.yaml` to `true` and providing a certificate authority in the TLS section: - -```yaml - -http: - mtls: true - ... -tls: - certificateAuthority: ~/hdb/keys/ca.pem - ... -``` - -### Certificate Revocation Checking - -When using mTLS, you may also want to enable certificate revocation checking to ensure that revoked certificates cannot be used for authentication, even if they're still within their validity period. Harper supports two industry-standard methods for checking certificate revocation status: - -**CRL (Certificate Revocation List)** - -- A digitally signed list of revoked certificates published by the Certificate Authority -- Downloaded and cached locally for fast verification -- Updated periodically (typically daily) -- Best for: High-volume verification, offline scenarios, predictable bandwidth usage - -**OCSP (Online Certificate Status Protocol)** - -- Real-time query to check individual certificate status -- Provides immediate revocation status -- Requires network connection for each check (with caching) -- Best for: Real-time revocation status, certificates without CRL distribution points - -**Harper's Approach: CRL-First with OCSP Fallback** - -Harper uses a CRL-first strategy for optimal performance: - -1. Checks CRL if available (fast, cached locally for 24 hours by default) -2. Falls back to OCSP if CRL is not available or fails (cached for 1 hour by default) -3. Applies the configured failure mode if both methods fail - -This strategy provides the best balance of performance, reliability, and security. - -**Enabling Certificate Verification** - -Certificate revocation checking is disabled by default and must be explicitly enabled: - -```yaml -http: - mtls: - required: true - certificateVerification: true # Enable with defaults -``` - -For production environments with high-security requirements, you can customize the verification settings: - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-closed # Reject connections on verification failure - crl: - timeout: 15000 # 15 seconds to download CRL - cacheTtl: 43200000 # Cache for 12 hours - ocsp: - timeout: 8000 # 8 seconds for OCSP response - cacheTtl: 7200000 # Cache for 2 hours -``` - -**Performance Considerations** - -- **CRL caching**: CRLs are cached locally, so subsequent verifications are very fast (no network requests) -- **OCSP caching**: Successful OCSP responses are cached (1 hour by default), errors cached for 5 minutes -- **Background refresh**: CRLs are refreshed in the background before expiration to avoid blocking requests -- **Graceful degradation**: Network failures don't block connections in fail-open mode - -**When to Use Certificate Verification** - -Enable certificate revocation checking when: - -- You need to immediately revoke access for compromised certificates -- Compliance or security policies require revocation checking -- You're in a zero-trust security environment -- Client certificates have long validity periods - -You may skip it if: - -- All certificates have very short validity periods (e.g., < 24 hours) -- You have alternative revocation mechanisms in place -- Performance is critical and risk is acceptable - -For detailed configuration options, see the [configuration reference](../../deployments/configuration#http). - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for Harper, Nginx can be used as a reverse proxy for Harper. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to Harper as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for Harper, a number of different external services can be used as a reverse proxy for Harper. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to Harper as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for Harper administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/docs/developers/security/certificate-verification.md b/docs/developers/security/certificate-verification.md deleted file mode 100644 index dd7360ec..00000000 --- a/docs/developers/security/certificate-verification.md +++ /dev/null @@ -1,502 +0,0 @@ ---- -title: Certificate Verification ---- - -# Certificate Verification - -Certificate verification (also called certificate revocation checking) is a security feature that ensures revoked certificates cannot be used for authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date due to compromise, employee departure, or other security concerns. - -## Overview - -When a client presents a certificate for mTLS authentication, Harper performs the following checks: - -1. **Certificate Validation** (always performed by Node.js TLS): - - Certificate signature is valid - - Certificate is issued by a trusted CA - - Certificate is within its validity period - - Certificate chain is properly formed - -2. **Certificate Revocation Checking** (optional, must be explicitly enabled): - - Certificate has not been revoked by the issuing CA - - Uses CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol) - -## Revocation Checking Methods - -Harper supports two industry-standard methods for checking certificate revocation status: - -### CRL (Certificate Revocation List) - -A CRL is a digitally signed list of revoked certificates published by a Certificate Authority. - -**Advantages:** - -- Fast verification (cached locally) -- Works offline once downloaded -- Predictable bandwidth usage -- Good for high-volume verification -- No privacy concerns (no per-certificate queries) - -**How it works:** - -1. Harper downloads the CRL from the distribution point specified in the certificate -2. CRL is cached locally (24 hours by default) -3. Subsequent verifications check the cached CRL (very fast, no network requests) -4. CRL is refreshed in the background before expiration - -**Configuration:** - -```yaml -http: - mtls: - certificateVerification: - crl: - timeout: 10000 # 10 seconds to download CRL - cacheTtl: 86400000 # Cache for 24 hours - gracePeriod: 86400000 # 24 hour grace period after nextUpdate - failureMode: fail-closed # Reject on CRL check failure -``` - -### OCSP (Online Certificate Status Protocol) - -OCSP provides real-time certificate status checking by querying the CA's OCSP responder. - -**Advantages:** - -- Real-time revocation status -- Smaller response size than CRL -- Good for certificates without CRL distribution points -- Works when CRL is unavailable - -**How it works:** - -1. Harper sends a request to the OCSP responder specified in the certificate -2. OCSP responder returns the current status (good, revoked, or unknown) -3. Response is cached (1 hour by default for success, 5 minutes for errors) - -**Configuration:** - -```yaml -http: - mtls: - certificateVerification: - ocsp: - timeout: 5000 # 5 seconds for OCSP response - cacheTtl: 3600000 # Cache successful responses for 1 hour - errorCacheTtl: 300000 # Cache errors for 5 minutes - failureMode: fail-closed # Reject on OCSP check failure -``` - -## Verification Strategy - -Harper uses a **CRL-first strategy with OCSP fallback** for optimal performance and reliability: - -1. **Check CRL** if available - - Fast (uses cached CRL) - - No network request needed if CRL is cached - - If CRL check succeeds or fails definitively, return result - -2. **Fall back to OCSP** if: - - Certificate has no CRL distribution point - - CRL download fails - - CRL is expired and cannot be refreshed - -3. **Apply failure mode** if both methods fail - -This strategy provides the best balance of: - -- **Performance**: CRL checks are very fast when cached -- **Reliability**: OCSP provides fallback when CRL is unavailable -- **Security**: Always attempts verification before falling back - -## Configuration - -### Enable with Defaults - -The simplest configuration enables certificate verification with sensible defaults: - -```yaml -http: - mtls: - required: true - certificateVerification: true -``` - -This enables: - -- CRL checking (enabled, 10s timeout, 24h cache) -- OCSP checking (enabled, 5s timeout, 1h cache) -- Fail-closed mode (rejects connections on verification failure) - -### Custom Configuration - -For production environments, you may want to customize settings: - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-closed # Global setting - crl: - timeout: 15000 # 15 seconds for CRL download - cacheTtl: 43200000 # Cache CRLs for 12 hours - gracePeriod: 86400000 # 24 hour grace period - failureMode: fail-closed # CRL-specific setting - ocsp: - timeout: 8000 # 8 seconds for OCSP response - cacheTtl: 7200000 # Cache results for 2 hours - errorCacheTtl: 600000 # Cache errors for 10 minutes - failureMode: fail-closed # OCSP-specific setting -``` - -### CRL Only (No OCSP) - -For environments where OCSP is not available or desired: - -```yaml -http: - mtls: - certificateVerification: - ocsp: false # Disable OCSP, CRL remains enabled -``` - -### OCSP Only (No CRL) - -For environments preferring real-time checking: - -```yaml -http: - mtls: - certificateVerification: - crl: false # Disable CRL, OCSP remains enabled -``` - -### Environment Variables - -All settings can be configured via environment variables: - -```bash -# Enable certificate verification -HTTP_MTLS_CERTIFICATEVERIFICATION=true - -# Global failure mode -HTTP_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed - -# CRL settings -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL=true -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_CACHETTL=43200000 -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_GRACEPERIOD=86400000 -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_FAILUREMODE=fail-closed - -# OCSP settings -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP=true -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_TIMEOUT=8000 -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_CACHETTL=7200000 -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_ERRORCACHETTL=600000 -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_FAILUREMODE=fail-closed -``` - -For replication servers, use the `REPLICATION_` prefix instead of `HTTP_`. - -## Failure Modes - -Certificate verification supports two failure modes that control behavior when verification cannot be completed: - -### fail-closed (Recommended) - -**Default behavior.** Rejects connections when verification fails due to network errors, timeouts, or other operational issues. - -**Use when:** - -- Security is paramount -- You can tolerate false positives (rejecting valid certificates) -- Your CA infrastructure is highly available -- You're in a zero-trust environment - -**Example:** - -```yaml -certificateVerification: - failureMode: fail-closed -``` - -### fail-open - -Allows connections when verification fails, but logs a warning. The connection is still rejected if the certificate is explicitly found to be revoked. - -**Use when:** - -- Availability is more important than perfect security -- Your CA infrastructure may be intermittently unavailable -- You have other compensating controls -- You're gradually rolling out certificate verification - -**Example:** - -```yaml -certificateVerification: - failureMode: fail-open -``` - -**Important:** Invalid signatures on CRLs always result in rejection regardless of failure mode, as this indicates potential tampering. - -## Performance Considerations - -### CRL Performance - -- **First verification**: Downloads CRL (10s timeout by default) -- **Subsequent verifications**: Instant (reads from cache) -- **Background refresh**: CRL is refreshed before expiration without blocking requests -- **Memory usage**: ~10-100KB per CRL depending on size -- **Network usage**: One download per CRL per cacheTtl period - -### OCSP Performance - -- **First verification**: OCSP query (5s timeout by default) -- **Subsequent verifications**: Reads from cache (1 hour default) -- **Memory usage**: Minimal (~1KB per cached response) -- **Network usage**: One query per unique certificate per cacheTtl period - -### Optimization Tips - -1. **Increase CRL cache TTL** for stable environments: - - ```yaml - crl: - cacheTtl: 172800000 # 48 hours - ``` - -2. **Increase OCSP cache TTL** for long-lived connections: - - ```yaml - ocsp: - cacheTtl: 7200000 # 2 hours - ``` - -3. **Use CRL only** if you control the CA and **all certificates have CRL distribution points**: - - ```yaml - ocsp: false # Only disable if all certs have CRL URLs - ``` - -4. **Reduce grace period** if you need tighter revocation enforcement: - ```yaml - crl: - gracePeriod: 0 # No grace period - ``` - -## Production Best Practices - -### High-Security Environments - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-closed # Always reject on failure - crl: - timeout: 15000 # Longer timeout for reliability - cacheTtl: 43200000 # 12 hours (balance security and performance) - gracePeriod: 0 # No grace period for strict enforcement - ocsp: - timeout: 8000 - cacheTtl: 3600000 # 1 hour -``` - -### High-Availability Environments - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-open # Prioritize availability - crl: - timeout: 5000 # Shorter timeout to fail faster - cacheTtl: 86400000 # 24 hours - gracePeriod: 86400000 # 24 hour grace period - ocsp: - timeout: 3000 - cacheTtl: 7200000 # 2 hours for fewer queries -``` - -### Performance-Critical Environments - -For maximum performance, increase cache durations to minimize network requests: - -```yaml -http: - mtls: - required: true - certificateVerification: - crl: - cacheTtl: 172800000 # 48 hours (minimize CRL downloads) - gracePeriod: 86400000 # 24 hour grace period - ocsp: - cacheTtl: 7200000 # 2 hours (minimize OCSP queries) - errorCacheTtl: 600000 # Cache errors for 10 minutes -``` - -**Note**: Only disable OCSP (`ocsp: false`) if you're certain all client certificates have CRL distribution points. Otherwise, certificates without CRLs won't be checked for revocation. - -## Troubleshooting - -### Connection Rejected: Certificate Verification Failed - -**Cause:** Certificate was found to be revoked or verification failed in fail-closed mode. - -**Solutions:** - -1. Check if certificate is actually revoked in the CRL or OCSP responder -2. Verify CA infrastructure is accessible -3. Check timeout settings (may need to increase) -4. Temporarily use fail-open mode while investigating: - ```yaml - certificateVerification: - failureMode: fail-open - ``` - -### High Latency on First Connection - -**Cause:** CRL is being downloaded for the first time. - -**Solutions:** - -1. This is normal and only happens once per CRL per cacheTtl period -2. Subsequent connections will be fast (cached CRL) -3. Increase CRL timeout if downloads are slow: - ```yaml - crl: - timeout: 20000 # 20 seconds - ``` - -### Frequent CRL Downloads - -**Cause:** CRL cacheTtl is too short or CRL nextUpdate period is very short. - -**Solutions:** - -1. Increase cacheTtl: - ```yaml - crl: - cacheTtl: 172800000 # 48 hours - ``` -2. Increase gracePeriod to allow using slightly expired CRLs: - ```yaml - crl: - gracePeriod: 172800000 # 48 hours - ``` - -### OCSP Responder Unavailable - -**Cause:** OCSP responder is down or unreachable. - -**Solutions:** - -1. CRL will be used as fallback automatically -2. Use fail-open mode to allow connections: - ```yaml - ocsp: - failureMode: fail-open - ``` -3. Disable OCSP and rely on CRL only (ensure all certs have CRL URLs): - ```yaml - ocsp: false - ``` - -### Network/Firewall Blocking Outbound Requests - -**Cause:** Secure hosting environments often restrict outbound HTTP/HTTPS traffic to reduce exfiltration risks. This prevents Harper from reaching CRL distribution points and OCSP responders. - -**Symptoms:** - -- Certificate verification timeouts in fail-closed mode -- Logs show connection failures to CRL/OCSP URLs -- First connection succeeds (no cached CRL), subsequent fail after cache expires - -**Solutions:** - -1. **Allow outbound traffic to CA infrastructure** (recommended): - - Whitelist CRL distribution point URLs (from your certificates) - - Whitelist OCSP responder URLs (from your certificates) - - Example: If using Let's Encrypt, allow `http://x1.c.lencr.org/` and `http://ocsp.int-x3.letsencrypt.org/` - -2. **Use fail-open mode** (allows connections when verification fails): - - ```yaml - certificateVerification: - failureMode: fail-open # Don't block on network issues - ``` - -3. **Use CRL only with local caching/proxy**: - - Set up an internal CRL mirror/proxy - - Configure firewall to allow Harper → internal CRL proxy - - Increase cache TTL to reduce fetch frequency: - ```yaml - certificateVerification: - crl: - cacheTtl: 172800000 # 48 hours - ocsp: false # Disable OCSP - ``` - -4. **Disable verification** (if you have alternative security controls): - ```yaml - certificateVerification: false - ``` - -## Security Considerations - -### When Certificate Verification is Critical - -Enable certificate verification when: - -- Certificates have long validity periods (> 1 day) -- You need immediate revocation capability -- Compliance requires revocation checking (PCI DSS, HIPAA, etc.) -- You're in a zero-trust security model -- Client certificates are used for API authentication - -### When You Might Skip It - -Consider not using certificate verification when: - -- Certificates have very short validity periods (< 24 hours) -- You rotate certificates automatically (e.g., with cert-manager) -- You have alternative revocation mechanisms -- Performance is critical and risk is acceptable -- Your CA doesn't publish CRLs or support OCSP - -### Defense in Depth - -Certificate verification is one layer of security. Also consider: - -- Short certificate validity periods (reduces window of compromise) -- Certificate pinning (prevents CA compromise) -- Network segmentation (limits blast radius) -- Access logging and monitoring -- Regular certificate rotation - -## Replication Server - -Certificate verification works identically for replication servers. Use the `replication.mtls` configuration: - -```yaml -replication: - hostname: server-one - routes: - - server-two - mtls: - certificateVerification: true -``` - -**Important:** mTLS is always required for replication and cannot be disabled. This configuration only controls whether certificate revocation checking is performed. - -For complete replication configuration, see [Configuration - Replication](../../deployments/configuration#replication). - -## Further Reading - -- [Certificate Management](./certificate-management) - Managing certificates and CAs -- [mTLS Authentication](./mtls-auth) - Setting up mTLS -- [Configuration Reference](../../deployments/configuration) - Complete configuration options diff --git a/docs/developers/security/configuration.md b/docs/developers/security/configuration.md deleted file mode 100644 index 2dee9d86..00000000 --- a/docs/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -Harper was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with Harper. - -## CORS - -Harper allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, Harper enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harpersystems.dev,https://products.harpersystems.dev` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -Harper provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -Harper automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from Harper Operations API.** diff --git a/docs/developers/security/index.md b/docs/developers/security/index.md deleted file mode 100644 index a090aa88..00000000 --- a/docs/developers/security/index.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Security ---- - -# Security - -Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they're supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -## Authentication - -- [JWT Authentication](security/jwt-auth) - Token-based authentication using JSON Web Tokens -- [Basic Authentication](security/basic-auth) - Username and password authentication -- [mTLS Authentication](security/mtls-auth) - Mutual TLS certificate-based authentication - -## Certificate Management - -- [Certificate Management](security/certificate-management) - Managing certificates and Certificate Authorities -- [Certificate Verification](security/certificate-verification) - Certificate revocation checking (CRL/OCSP) - -## Access Control - -- [Configuration](security/configuration) - Security configuration and settings -- [Users and Roles](security/users-and-roles) - Role-based access control and permissions diff --git a/docs/developers/security/jwt-auth.md b/docs/developers/security/jwt-auth.md deleted file mode 100644 index 832373e4..00000000 --- a/docs/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -Harper uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all Harper operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their Harper credentials. The following POST body is sent to Harper. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by Harper. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by Harper. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/docs/developers/security/mtls-auth.md b/docs/developers/security/mtls-auth.md deleted file mode 100644 index f757f60d..00000000 --- a/docs/developers/security/mtls-auth.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: mTLS Authentication ---- - -# mTLS Authentication - -Harper supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. - -## Certificate Revocation Checking - -When using mTLS authentication, you can optionally enable certificate revocation checking to ensure that revoked certificates cannot be used, even if they are otherwise valid and trusted. This adds an important security layer by checking whether certificates have been explicitly revoked by the issuing Certificate Authority. - -Harper supports both CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) for checking certificate revocation status, using a CRL-first strategy with OCSP fallback for optimal performance and reliability. - -**To enable certificate verification:** - -```yaml -http: - mtls: - required: true - certificateVerification: true # Enable revocation checking -``` - -Certificate revocation checking is **disabled by default** and must be explicitly enabled. For detailed information about certificate revocation checking, including configuration options, performance considerations, and best practices, see [Certificate Management - Certificate Revocation Checking](./certificate-management#certificate-revocation-checking). diff --git a/docs/developers/security/users-and-roles.md b/docs/developers/security/users-and-roles.md deleted file mode 100644 index cff17e5a..00000000 --- a/docs/developers/security/users-and-roles.md +++ /dev/null @@ -1,273 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -Harper utilizes a Role-Based Access Control (RBAC) framework to manage access to Harper instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in Harper - -Role permissions in Harper are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a Harper instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. Harper will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within Harper. See full breakdown of operations restricted to only super_user roles [here](users-and-roles#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a Harper instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a Harper instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```json -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "database_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true - } - ] - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [] - } - } - } - } -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within Harper, but ignored, as super_users have full access to the database._ - -- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] - } -} -``` - -**Important Notes About Table Permissions** - -1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that Harper manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in Harper and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ - -| Databases and Tables | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| describe_all | | -| describe_database | | -| describe_table | | -| create_database | X | -| drop_database | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed Harper as `<>`. Because Harper stores files natively on the operating system, we only allow the Harper executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files Harper needs. This also keeps Harper more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/docs/index.mdx b/docs/index.mdx deleted file mode 100644 index b25b1d96..00000000 --- a/docs/index.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: Harper Docs ---- - -import CustomDocCardList from '@site/src/components/CustomDocCardList'; - -# Harper Docs - -:::info - -### Get the Most Out of Harper - -Join our Discord to access expert support, collaborate with Harper's core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) -::: - -Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. - -Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. - -## Getting Started - -The best way to get started using Harper is to head over to the [Learn](/learn/) section and work through the Getting Started and Developer guides. - -## Building with Harper - - diff --git a/docs/reference/_category_.json b/docs/reference/_category_.json deleted file mode 100644 index 1a36ae90..00000000 --- a/docs/reference/_category_.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "link": { - "type": "doc", - "id": "reference/index" - } -} diff --git a/docs/reference/analytics.md b/docs/reference/analytics.md deleted file mode 100644 index 742a299d..00000000 --- a/docs/reference/analytics.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -Harper provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -Harper collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the Harper analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -# Standard Analytics Metrics - -While applications can define their own metrics, Harper provides a set of standard metrics that are tracked for all services: - -## HTTP - -The following metrics are tracked for all HTTP requests: - -| `metric` | `path` | `method` | `type` | Unit | Description | -| ------------------ | ------------- | -------------- | ---------------------------------------------- | ------------ | ------------------------------------------------------- | -| `duration` | resource path | request method | `cache-hit` or `cache-miss` if a caching table | milliseconds | Duration of request handler | -| `duration` | route path | request method | fastify-route | milliseconds | | -| `duration` | operation | | operation | milliseconds | | -| `success` | resource path | request method | | % | | -| `success` | route path | request method | fastify-route | % | | -| `success` | operation | | operation | % | | -| `bytes-sent` | resource path | request method | | bytes | | -| `bytes-sent` | route path | request method | fastify-route | bytes | | -| `bytes-sent` | operation | | operation | bytes | | -| `transfer` | resource path | request method | operation | milliseconds | duration of transfer | -| `transfer` | route path | request method | fastify-route | milliseconds | duration of transfer | -| `transfer` | operation | | operation | milliseconds | duration of transfer | -| `socket-routed` | | | | % | percentage of sockets that could be immediately routed | -| `tls-handshake` | | | | milliseconds | | -| `tls-reused` | | | | % | percentage of TLS that reuses sessions | -| `cache-hit` | table name | | | % | The percentage of cache hits | -| `cache-resolution` | table name | | | milliseconds | The duration of resolving requests for uncached entries | - -The following are metrics for real-time MQTT connections: -| `metric` | `path` | `method` | `type` | Unit | Description | -|---|---|---|---|---|---| -| `mqtt-connections` | | | | count | The number of open direct MQTT connections | -| `ws-connections` | | | | count | number of open WS connections| -| `connection` | `mqtt` | `connect` | | % | percentage of successful direct MQTT connections | -| `connection` | `mqtt` | `disconnect` | | % | percentage of explicit direct MQTT disconnects | -| `connection` | `ws` | `connect` | | % | percentage of successful WS connections | -| `connection` | `ws` | `disconnect` | | % | percentage of explicit WS disconnects | -| `bytes-sent` | topic | mqtt command | `mqtt` | bytes | The number of bytes sent for a given command and topic | - -The following are metrics for replication: - -| `metric` | `path` | `method` | `type` | Unit | Description | -| ---------------- | ------------- | ------------- | --------- | ----- | ----------------------------------------------------- | -| `bytes-sent` | node.database | `replication` | `egress` | bytes | The number of bytes sent for replication | -| `bytes-sent` | node.database | `replication` | `blob` | bytes | The number of bytes sent for replication of blobs | -| `bytes-received` | node.database | `replication` | `ingress` | bytes | The number of bytes received for replication | -| `bytes-received` | node.database | `replication` | `blob` | bytes | The number of bytes received for replication of blobs | - -The following are general resource usage statistics that are tracked: - -| `metric` | primary attribute(s) | other attribute(s) | Unit | Description | -| ------------------------- | ------------------------------------------------------------------------------------------------ | ------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------- | -| `database-size` | `size`, `used`, `free`, `audit` | `database` | bytes | The size of the database in bytes | -| `main-thread-utilization` | `idle`, `active`, `taskQueueLatency`, `rss`, `heapTotal`, `heapUsed`, `external`, `arrayBuffers` | `time` | various | Main thread resource usage; including idle time, active time, task queue latency, RSS, heap, buffer and external memory usage | -| `resource-usage` | | | various | [See breakout below](#resource-usage) | -| `storage-volume` | `available`, `free`, `size` | `database` | bytes | The size of the storage volume in bytes | -| `table-size` | `size` | `database`, `table` | bytes | The size of the table in bytes | -| `utilization` | | | % | How much of the time the worker was processing requests | - - -`resource-usage` metrics are everything returned by [node:process.resourceUsage()](https://nodejs.org/api/process.html#processresourceusage)[^1] plus the following additional metrics: - -| `metric` | Unit | Description | -| ---------------- | ---- | ----------------------------------------------------- | -| `time` | ms | Current time when metric was recorded (Unix time) | -| `period` | ms | Duration of the metric period | -| `cpuUtilization` | % | CPU utilization percentage (user and system combined) | - -[^1]: The `userCPUTime` and `systemCPUTime` metrics are converted to milliseconds to match the other time-related metrics. diff --git a/docs/reference/architecture.md b/docs/reference/architecture.md deleted file mode 100644 index 4155d5ff..00000000 --- a/docs/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -Harper's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/docs/reference/blob.md b/docs/reference/blob.md deleted file mode 100644 index 57dd7081..00000000 --- a/docs/reference/blob.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Blob ---- - -# Blob - -Blobs are binary large objects that can be used to store any type of unstructured/binary data and is designed for large content. Blobs support streaming and feature better performance for content larger than about 20KB. Blobs are built off the native JavaScript `Blob` type, and HarperDB extends the native `Blob` type for integrated storage with the database. To use blobs, you would generally want to declare a field as a `Blob` type in your schema: - -```graphql -type MyTable { - id: Any! @primaryKey - data: Blob -} -``` - -You can then create a blob which writes the binary data to disk, and can then be included (as a reference) in a record. For example, you can create a record with a blob like: - -```javascript -let blob = createBlob(largeBuffer); -await MyTable.put({ id: 'my-record', data: blob }); -``` - -The `data` attribute in this example is a blob reference, and can be used like any other attribute in the record, but it is stored separately, and the data must be accessed asynchronously. You can retrieve the blob data with the standard `Blob` methods: - -```javascript -let buffer = await blob.bytes(); -``` - -If you are creating a resource method, you can return a `Response` object with a blob as the body: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - let record = super.get(target); - return { - status: 200, - headers: {}, - body: record.data, // record.data is a blob - }; - } -} -``` - -When using the exported REST APIs for your tables, blobs will by default be treated with a UTF-8 encoding and contain text/plain content. - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "Why hello there, world!" - }' -``` - -To store arbitrary binary content (such as audio data) in a blob, using CBOR is recommended when making API requests. This will let you control the contents of the blob precisely. - -If you need to use JSON, Base64 encoding your contents can be a great choice, but you'll need to do a bit of work to control the encoding of the underlying blob: - -```typescript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; - - create(target: RequestTarget, record: Partial) { - if (record.data) { - record.data = Buffer.from(record.data, 'base64'); - } - return super.create(target, record); - } -} -``` - -Now you can create records and they'll be encoded appropriately. For example, here's a small .jpg encoded in base64: - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "/9j/4QDKRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAVgEbAAUAAAABAAAAXgEoAAMAAAABAAIAAAITAAMAAAABAAEAAIdpAAQAAAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAeQAAAHAAAABDAyMjGRAQAHAAAABAECAwCgAAAHAAAABDAxMDCgAQADAAAAAQABAACgAgAEAAAAAQAAABCgAwAEAAAAAQAAABCkBgADAAAAAQAAAAAAAAAAAAD/2wCEAAEBAQEBAQIBAQIDAgICAwQDAwMDBAYEBAQEBAYHBgYGBgYGBwcHBwcHBwcICAgICAgJCQkJCQsLCwsLCwsLCwsBAgICAwMDBQMDBQsIBggLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLC//dAAQAAf/AABEIABAAEAMBIgACEQEDEQH/xAGiAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgsQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APz68CaN8Mp/DWveJviDqE0R0qGIwWsGEaR532J83uwwABXH+MtP8N6Hryad4cvJrm3lgjlX7WES4R2zujcIAvy8YIHQ+1eYeKdAu9VtTNpUvk3aAeWSxCblOVJA4O08jIrR0/R1txDc37m4u0QK8p7tjkgdBmv2zD4apGvUq1KjcXtHTTRWP0nEUqzxcatKbUEkuWy5fN3+Lmvt0tp2t//Z" - }' -``` - -One of the important characteristics of blobs is they natively support asynchronous streaming of data. This is important for both creation and retrieval of large data. When we create a blob with `createBlob`, the returned blob will create the storage entry, but the data will be streamed to storage. This means that you can create a blob from a buffer or from a stream. You can also create a record that references a blob before the blob is fully written to storage. For example, you can create a blob from a stream: - -```javascript -let blob = createBlob(stream); -// at this point the blob exists, but the data is still being written to storage -await MyTable.put({ id: 'my-record', data: blob }); -// we now have written a record that references the blob -let record = await MyTable.get('my-record'); -// we now have a record that gives us access to the blob. We can asynchronously access the blob's data or stream the data, and it will be available as blob the stream is written to the blob. -let stream = record.data.stream(); -``` - -This can be powerful functionality for large media content, where content can be streamed into storage as it streamed out in real-time to users as it is received, or even for web content where low latency transmission of data from origin is critical. However, this also means that blobs are _not_ atomic or [ACID](https://en.wikipedia.org/wiki/ACID) compliant; streaming functionality achieves the opposite behavior of ACID/atomic writes that would prevent access to data as it is being written, and wait until data is fully available before a commit. Alternately, we can also use the `saveBeforeCommit` flag to indicate that the blob should be fully written to storage before committing a transaction to ensure that the whole blob is available before the transaction commits and writes the record: - -```javascript -let blob = createBlob(stream, { saveBeforeCommit: true }); -// this put will not commit and resolve until the blob is written and then the record is written -await MyTable.put({ id: 'my-record', data: blob }); -``` - -Note that using `saveBeforeCommit` does not necessarily guarantee full ACID compliance. This can be combined with the `flush` flag to provide a stronger guarantee that a blob is flushed to disk before commiting a transaction. However, the error handling below provides a stronger guarantee of proper blob handling when the process of streaming/writing a blob is interrupted and using proper error handling is recommended, instead of relying `saveBeforeCommit`, for the best combination reliability and performance. - -### Error Handling - -Because blobs can be streamed and referenced prior to their completion, there is a chance that an error or interruption could occur while streaming data to the blob (after the record is committed). We can create an error handler for the blob to handle the case of an interrupted blob: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - const record = super.get(target); - let blob = record.data; - blob.on('error', () => { - // if this was a caching table, we may want to invalidate or delete this record: - MyTable.invalidate(target); - // we may want to re-retrieve the blob - }); - return { - status: 200, - headers: {}, - body: blob - }); - } -} -``` - -### Blob `size` - -Blobs that are created from streams may not have the standard `size` property available, because the size may not be known while data is being streamed. Consequently, the `size` property may be undefined until the size is determined. You can listen for the `size` event to be notified when the size is available: - -```javascript -let record = await MyTable.get('my-record'); -let blob = record.data; -blob.size; // will be available if it was saved with a known size -let stream = blob.stream(); // start streaming the data -if (blob.size === undefined) { - blob.on('size', (size) => { - // will be called once the size is available - }); -} -``` - -### Blob Coercion - -When a field is defined to use the `Blob` type, any strings or buffers that are assigned to that field in a `put`, `patch`, or `publish`, will automatically be coerced to a `Blob`. This makes it easy to use a `Blob` type even with JSON data that may come HTTP request bodies or MQTT messages, that do not natively support a `Blob` type. - -See the [configuration](../deployments/configuration) documentation for more information on configuring where blob are stored. diff --git a/docs/reference/clustering/certificate-management.md b/docs/reference/clustering/certificate-management.md deleted file mode 100644 index a11a1a35..00000000 --- a/docs/reference/clustering/certificate-management.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box Harper generates certificates that are used when Harper nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the Harper node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that Harper generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your Harper cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make Harper start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart Harper. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Revocation Checking - -Harper automatically performs certificate revocation checking using OCSP (Online Certificate Status Protocol) for all cluster connections. This critical security feature ensures that: - -- Revoked certificates cannot be used for cluster communication -- Compromised nodes can be quickly isolated by revoking their certificates -- Certificate status is verified in real-time with the Certificate Authority - -Certificate verification is enabled by default for cluster connections and follows the same configuration as HTTP mTLS connections. The verification settings can be customized in the HTTP configuration section to balance security requirements with performance considerations. - -For production clusters, consider using `failureMode: fail-closed` to ensure maximum security by rejecting connections when OCSP verification cannot be completed. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other Harper nodes and to make requests to other Harper nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart Harper. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/docs/reference/clustering/creating-a-cluster-user.md b/docs/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 0a8b2a6c..00000000 --- a/docs/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via Harper users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, Harper must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/docs/reference/clustering/enabling-clustering.md b/docs/reference/clustering/enabling-clustering.md deleted file mode 100644 index 606bc29c..00000000 --- a/docs/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install Harper**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/docs/reference/clustering/establishing-routes.md b/docs/reference/clustering/establishing-routes.md deleted file mode 100644 index 1d4d5ae2..00000000 --- a/docs/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the Harper configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.6/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/docs/reference/clustering/index.md b/docs/reference/clustering/index.md deleted file mode 100644 index fddd3851..00000000 --- a/docs/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: NATS Clustering ---- - -# NATS Clustering - -Harper 4.0 - 4.3 used a clustering system based on NATS for replication. In 4.4+, Harper has moved to a new native replication system that has better performance, reliability, and data consistency. This document describes the legacy NATS clustering system. Harper clustering is the process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. - -Harper’s clustering engine replicates data between instances of Harper using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -Harper simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting Harper focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to Harper, you reduce the overall exposure of your application to outside forces. diff --git a/docs/reference/clustering/managing-subscriptions.md b/docs/reference/clustering/managing-subscriptions.md deleted file mode 100644 index f043c9d1..00000000 --- a/docs/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Managing subscriptions ---- - -Tables are replicated when the table is designated as replicating and there is subscription between the nodes. -Tables designated as replicating by default, but can be changed by setting `replicate` to `false` in the table definition: - -```graphql -type Product @table(replicate: false) { - id: ID! - name: String! -} -``` - -Or in your harperdb-config.yaml, you can set the default replication behavior for databases, and indicate which databases -should be replicated by default: - -```yaml -replication: - databases: data -``` - -If a table is not in the list of databases to be replicated, it will not be replicated unless the table is specifically set to replicate: - -```graphql -type Product @table(replicate: true) { - id: ID! - name: String! -} -``` - -Reading hdb*nodes (what we do \_to* the node, not what the node does). - -The subscription can be set to publish, subscribe, or both. - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `set_node_replication`. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "data", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "database": "data", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "database": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/docs/reference/clustering/naming-a-node.md b/docs/reference/clustering/naming-a-node.md deleted file mode 100644 index 7a512efb..00000000 --- a/docs/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/docs/reference/clustering/requirements-and-definitions.md b/docs/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 22bc3977..00000000 --- a/docs/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of Harper running. - -\*_A node is a single instance/installation of Harper. A node of Harper can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a Harper cluster. diff --git a/docs/reference/clustering/subscription-overview.md b/docs/reference/clustering/subscription-overview.md deleted file mode 100644 index b4827de7..00000000 --- a/docs/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.6/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.6/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.6/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.6/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/docs/reference/clustering/things-worth-knowing.md b/docs/reference/clustering/things-worth-knowing.md deleted file mode 100644 index f523c7bf..00000000 --- a/docs/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -Harper has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -Harper clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.6/clustering/figure6.png) diff --git a/docs/reference/components/applications.md b/docs/reference/components/applications.md deleted file mode 100644 index 41210f38..00000000 --- a/docs/reference/components/applications.md +++ /dev/null @@ -1,221 +0,0 @@ ---- -title: Applications ---- - -# Applications - -> The contents of this page predominantly relate to **application** components. Extensions are not necessarily _deployable_. The ambiguity of the term "components" is being worked on and will be improved in future releases. As we work to clarify the terminology, please keep in mind that the component operations are synonymous with application management. In general, "components" is the general term for both applications and extensions, but in context of the operations API it refers to applications only. - -Harper offers several approaches to managing applications that differ between local development and Harper managed instances. This page will cover the recommended methods of developing, installing, deploying, and running Harper applications. - -## Local Development - -Harper is designed to be simple to run locally. Generally, Harper should be installed locally on a machine using a global package manager install (i.e. `npm i -g harperdb`). - -> Before continuing, ensure Harper is installed and the `harperdb` CLI is available. For more information, review the [installation guide](../../deployments/install-harper/). - -When developing an application locally there are a number of ways to run it on Harper. - -### `dev` and `run` commands - -The quickest way to run an application is by using the `dev` command within the application directory. - -The `harperdb dev .` command will automatically watch for file changes within the application directory and restart the Harper threads when changes are detected. - -The `dev` command will **not** restart the main thread; if this is a requirement, switch to using `run` instead and manually start/stop the process to execute the main thread. - -Stop execution for either of these processes by sending a SIGINT (generally CTRL+C) signal to the process. - -### Deploying to a local Harper instance - -Alternatively, to mimic interfacing with a hosted Harper instance, use operation commands instead. - -1. Start up Harper with `harperdb` -1. _Deploy_ the application to the local instance by executing: - - ```sh - harperdb deploy \ - project= \ - package= \ - restart=true - ``` - - - Make sure to omit the `target` option so that it _deploys_ to the Harper instance running locally - - The `package=` option creates a symlink to the application simplifying restarts - - By default, the `deploy` operation command will _deploy_ the current directory by packaging it up and streaming the bytes. By specifying `package`, it skips this and references the file path directly - - The `restart=true` option automatically restarts Harper threads after the application is deployed - - If set to `'rolling'`, a rolling restart will be triggered after the application is deployed - -1. In another terminal, use the `harperdb restart` command to restart the instance's threads at any time - - With `package=`, the application source is symlinked so changes will automatically be picked up between restarts - - If `package` was omitted, run the `deploy` command again with any new changes -1. To remove the application use `harperdb drop_component project=` - -Similar to the previous section, if the main thread needs to be restarted, start and stop the Harper instance manually (with the application deployed). Upon Harper startup, the application will automatically be loaded and executed across all threads. - -> Not all [component operations](../../developers/operations-api/components) are available via CLI. When in doubt, switch to using the Operations API via network requests to the local Harper instance. - -For example, to properly _deploy_ a `test-application` locally, the command would look like: - -```sh -harperdb deploy \ - project=test-application \ - package=/Users/dev/test-application \ - restart=true -``` - -> If the current directory is the application directory, use a shortcut such as `package=$(pwd)` to avoid typing out the complete path. - -Keep in mind that using a local file path for `package` will only work locally; deploying to a remote instance requires a different approach. - -## Remote Management - -Managing applications on a remote Harper instance is best accomplished through [component operations](../../developers/operations-api/components), similar to using the `deploy` command locally. Before continuing, always backup critical Harper instances. Managing, deploying, and executing applications can directly impact a live system. - -Remote Harper instances work very similarly to local Harper instances. The primary application management operations still include `deploy_component`, `drop_component`, and `restart`. - -The key to remote management is specifying a remote `target` along with appropriate username/password values. These can all be specified using CLI arguments: `target`, `username`, and `password`. Alternatively, the `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` environment variables can replace the `username` and `password` arguments. - -All together: - -```sh -harperdb deploy \ - project= \ - package= \ - username= \ - password= \ - target= \ - restart=true \ - replicated=true -``` - -Or, using environment variables: - -```sh -export CLI_TARGET_USERNAME= -export CLI_TARGET_PASSWORD= -harperdb deploy \ - project= \ - package= \ - target= \ - restart=true \ - replicated=true -``` - -Unlike local development where `package` should be set to a local file path for symlinking and improved development experience purposes, now it has some additional options. - -A local application can be deployed to a remote instance by **omitting** the `package` field. Harper will automatically package the local directory and include that along with the rest of the deployment operation. - -Furthermore, the `package` field can be set to any valid [npm dependency value](https://docs.npmjs.com/cli/v11/configuring-npm/package-json#dependencies). - -- For applications deployed to npm, specify the package name: `package="@harperdb/status-check"` -- For applications on GitHub, specify the URL: `package="https://github.com/HarperDB/status-check"`, or the shorthand `package=HarperDB/status-check` -- Private repositories also work if the correct SSH keys are on the server: `package="git+ssh://git@github.com:HarperDB/secret-applications.git"` - - Reference the [SSH Key](../../developers/operations-api/components#add-ssh-key) operations for more information on managing SSH keys on a remote instance -- Even tarball URLs are supported: `package="https://example.com/application.tar.gz"` - -> When using git tags, we highly recommend that you use the semver directive to ensure consistent and reliable installation by npm. In addition to tags, you can also reference branches or commit numbers. - -These `package` values are all supported because behind-the-scenes, Harper is generating a `package.json` file for the components. Then, it uses a form of `npm install` to resolve them as dependencies. This is why symlinks are generated when specifying a file path locally. The following [Advanced](#advanced) section explores this pattern in more detail. - -Finally, don't forget to include `restart=true`, or run `harperdb restart target=`. - -## Dependency Management - -Naturally, applications may have dependencies. Since we operate on top of Node.js, we default to leveraging `npm` and `package.json` for dependency management. - -As already covered, there are a number of ways to run an application on Harper. From symlinking to a local directory, to deploying it via the `deploy_component` operation. Harper does its best to seamlessly run your application. - -During application loading, if an application directory contains a `node_modules` directory or it excludes a `package.json`, Harper will skip dependency installation. Otherwise, Harper will check the application's config (values specified in the `harperdb-config.yaml` file) for `install: { command, timeout }` fields (see the example below for more information). If it exists, Harper will use the specified command to install dependencies. If not, then Harper will attempt to derive the package manager from the [`package.json#devEngines#packageManager`](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#devengines) field (which can specify an npm alternate like yarn or pnpm). Finally, if no package manager or install command could be derived, Harper will default to using `npm install`. - -The Application operations [`add_component`](../../developers/operations-api/components.md#add-component) and [`deploy_component`](../../developers/operations-api/components.md#deploy-component) support customizing the install command (and timeout) through the `install_command` and `install_timeout` fields. - -If you plan to use an alternative package manager than `npm`, ensure it is installed and configured on the host machine. Harper does not currently support the `"onFail": "download"` option in `package.json#devEngines#packageManager` and will fallback to `"onFail": "error"` behavior. - -### Example `harperdb-config.yaml` - -```yaml -myApp: - package: ./my-app - install: - command: yarn install - timeout: 600000 # 10 minutes -``` - -### Example `package.json` - -```json -{ - "name": "my-app", - "version": "1.0.0", - "devEngines": { - "packageManager": { - "name": "pnpm", - "onFail": "error" - } - } -} -``` - -## Advanced - -The following methods are advanced and should be executed with caution as they can have unintended side-effects. Always backup any critical Harper instances before continuing. - -First, locate the Harper installation `rootPath` directory. Generally, this is `~/hdb`. It can be retrieved by running `harperdb get_configuration` and looking for the `rootPath` field. - -> For a useful shortcut on POSIX compliant machines run: `harperdb get_configuration json=true | jq ".rootPath" | sed 's/"/g'` - -This path is the Harper instance. Within this directory, locate the root config titled `harperdb-config.yaml`, and the components root path. The components root path will be `/components` by default (thus, `~/hdb/components`), but it can also be configured. If necessary, use `harperdb get_configuration` again and look for the `componentsRoot` field for the exact path. - -### Adding components to root - -Similar to how components can specify other components within their `config.yaml`, applications can be added to Harper by adding them to the `harperdb-config.yaml`. - -The configuration is very similar to that of `config.yaml`. Entries are comprised of a top-level `:`, and an indented `package: ` field. Any additional component options can also be included as indented fields. - -```yaml -status-check: - package: '@harperdb/status-check' -``` - -The key difference between this and a component's `config.yaml` is that the name does **not** need to be associated with a `package.json` dependency. When Harper starts up, it transforms these configurations into a `package.json` file, and then executes a form of `npm install`. Thus, the `package: ` can be any valid dependency syntax such as npm packages, GitHub repos, tarballs, and local directories are all supported. - -Given a root config like: - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from npm -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -Harper will generate a `package.json` like: - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -npm will install all the components and store them in ``. A symlink back to `/node_modules` is also created for dependency resolution purposes. - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -By specifying a file path, npm will generate a symlink and then changes will be automatically picked up between restarts. diff --git a/docs/reference/components/built-in-extensions.md b/docs/reference/components/built-in-extensions.md deleted file mode 100644 index 81cb456e..00000000 --- a/docs/reference/components/built-in-extensions.md +++ /dev/null @@ -1,319 +0,0 @@ ---- -title: Built-In Extensions ---- - -# Built-In Extensions - -Harper provides extended features using built-in extensions. They do **not** need to be installed with a package manager, and simply must be specified in a config to run. These are used throughout many Harper docs, guides, and examples. Unlike custom extensions which have their own semantic versions, built-in extensions follow Harper's semantic version. - -For more information read the [Components, Applications, and Extensions](../../developers/applications/) documentation section. - -- [Built-In Extensions](#built-in-extensions) - - [dataLoader](#dataloader) - - [fastifyRoutes](#fastifyroutes) - - [graphql](#graphql) - - [graphqlSchema](#graphqlschema) - - [jsResource](#jsresource) - - [loadEnv](#loadenv) - - [rest](#rest) - - [roles](#roles) - - [static](#static) - - [Options](#options) - - [Examples](#examples) - - [Basic Static File Serving](#basic-static-file-serving) - - [Enable automatic `index.html` serving](#enable-automatic-indexhtml-serving) - - [Enable automatic `.html` extension matching](#enable-automatic-html-extension-matching) - - [Provide a custom `404 Not Found` page](#provide-a-custom-404-not-found-page) - - [Fully customize not found response](#fully-customize-not-found-response) - -## dataLoader - -Load data from JSON or YAML files into Harper tables as part of component deployment. - -This component is an [Extension](..#extensions) and can be configured with the `files` configuration option. - -Complete documentation for this feature is available here: [Data Loader](../../developers/applications/data-loader) - -```yaml -dataLoader: - files: 'data/*.json' -``` - -## fastifyRoutes - -Specify custom endpoints using [Fastify](https://fastify.dev/). - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Define Fastify Routes](../../developers/applications/define-routes) - -```yaml -fastifyRoutes: - files: 'routes/*.js' -``` - -## graphql - -> GraphQL querying is **experimental**, and only partially implements the GraphQL Over HTTP / GraphQL specifications. - -Enables GraphQL querying via a `/graphql` endpoint loosely implementing the GraphQL Over HTTP specification. - -Complete documentation for this feature is available here: [GraphQL](../graphql) - -```yaml -graphql: true -``` - -## graphqlSchema - -Specify schemas for Harper tables and resources via GraphQL schema syntax. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Schemas](../../developers/applications/defining-schemas) - -```yaml -graphqlSchema: - files: 'schemas.graphql' -``` - -## jsResource - -Specify custom, JavaScript based Harper resources. - -Refer to the Application [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) guide, or [Resource Class](../resources/) reference documentation for more information on custom resources. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -```yaml -jsResource: - files: 'resource.js' -``` - -## loadEnv - -Load environment variables via files like `.env`. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Ensure this component is specified first in `config.yaml` so that environment variables are loaded prior to loading any other components. - -```yaml -loadEnv: - files: '.env' -``` - -This component matches the default behavior of dotenv where existing variables take precedence. Specify the `override` option in order to override existing environment variables assigned to `process.env`: - -```yaml -loadEnv: - files: '.env' - override: true -``` - -> Important: Harper is a single process application. Environment variables are loaded onto `process.env` and will be shared throughout all Harper components. This means environment variables loaded by one component will be available on other components (as long as the components are loaded in the correct order). - - - - - - - - - -## rest - -Enable automatic REST endpoint generation for exported resources with this component. - -Complete documentation for this feature is available here: [REST](../../developers/rest) - -```yaml -rest: true -``` - -This component contains additional options: - -To enable `Last-Modified` header support: - -```yaml -rest: - lastModified: true -``` - -To disable automatic WebSocket support: - -```yaml -rest: - webSocket: false -``` - -## roles - -Specify roles for Harper tables and resources. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Roles](../../developers/applications/defining-roles) - -```yaml -roles: - files: 'roles.yaml' -``` - -## static - -Serve static files via HTTP. - -Use the [Resource Extension](./extensions#resource-extension) configuration options [`files` and `urlPath`](./extensions#resource-extension-configuration) to specify the files to be served. - -``` -my-app/ -├─ site/ -│ ├─ index.html -│ ├─ about.html -│ ├─ blog/ -│ ├─ post-1.html -│ ├─ post-2.html -├─ config.yaml -``` - -The `static` plugin can be configured to serve the `site/` directory by specifying: - -```yaml -static: - files: 'site/**' -``` - -Then you could access the files relative to the `site` directory, thus `GET localhost:9926/index.html` would return the contents of `site/index.html`, and `GET localhost:9926/blog/post-1.html` would return the contents of `site/blog/post-1.html`. - -You can use the `urlPath` option to serve the files from a different URL path, for example: - -```yaml -static: - files: 'site/**' - urlPath: 'app' -``` - -Now, `GET localhost:9926/app/index.html` would return the contents of `site/index.html`, and `GET localhost:9926/app/blog/post-1.html` would return the contents of `site/blog/post-1.html`. - -Moreover, if the `site/` directory was nested another level, such as: - -``` -my-app/ -├─ site/ -│ ├─ pages/ -│ ├─ index.html -│ ├─ about.html -│ ├─ blog/ -│ ├─ post-1.html -│ ├─ post-2.html -│ ├─ cache-info/ -│ ├─ index.json -│ ├─ about.json -│ ├─ ... -├─ config.yaml -``` - -Now a pattern such as `site/pages/**` will match all files within the `pages` directory (including subdirectories) so a request to `GET localhost:9926/index.html` will return the contents of `site/pages/index.html`, and `GET localhost:9926/blog/post-1.html` will return the contents of `site/pages/blog/post-1.html`. - -Because this plugin is implemented using the new [Plugin API](./plugins.md), it automatically updates to application changes. From updating the `config.yaml` to adding, removing, or modifying files, everything is handled automatically and Harper should **not** require a restart. - -### Options - -In addition to the general Plugin configuration options (`files`, `urlPath`, and `timeout`), this plugin supports the following configuration options: - -- `extensions` - `string[]` - _optional_ - An array of file extensions to try and serve when an exact path is not found. For example, `['html']` and the path `/site/page-1` will match `/site/page-1.html`. -- `fallthrough` - `boolean` - _optional_ - If `true`, the plugin will fall through to the next handler if the requested file is not found. Make sure to disable this option if you want to customize the 404 Not Found response with the `notFound` option. Defaults to `true`. -- `index` - `boolean` - _optional_ - If `true`, the plugin will serve an `index.html` file if it exists in the directory specified by the `files` pattern. Defaults to `false`. -- `notFound` - `string | { file: string; statusCode: number }` - _optional_ - Specify a custom file to be returned for 404 Not Found responses. If you want to specify a different statusCode when a given path cannot be found, use the object form and specify the `file` and `statusCode` properties (this is particularly useful for SPAs). - -### Examples - -The `static` plugin can be configured in various ways to provide different behaviors. Here are some common examples: - -#### Basic Static File Serving - -Serve all files contained within the `static/` directory as is. - -```yaml -static: - files: 'static/**' -``` - -Requests must match the file names exactly (relative to the `static/` directory). - -#### Enable automatic `index.html` serving - -Serve all files contained within the `static/` directory, and automatically serve an `index.html` file if it exists in the directory. - -```yaml -static: - files: 'static/**' - index: true -``` - -Now given a directory structure like: - -``` -my-app/ -├─ static/ -│ ├─ index.html -│ ├─ blog/ -│ ├─ index.html -│ ├─ post-1.html -``` - -Requests would map like: - -``` -GET / -> static/index.html -GET /blog -> static/blog/index.html -GET /blog/post-1.html -> static/blog/post-1.html -``` - -#### Enable automatic `.html` extension matching - -Expanding on the previous example, if you specify the `extensions` option, the plugin will automatically try to match the requested path with the specified extensions. - -```yaml -static: - files: 'static/**' - index: true - extensions: ['html'] -``` - -Now with the same directory structure, requests would map like: - -``` -GET / -> static/index.html -GET /blog -> static/blog/index.html -GET /blog/post-1 -> static/blog/post-1.html -``` - -#### Provide a custom `404 Not Found` page - -Sometimes when a `404 Not Found` response is not sufficient, and you want to provide a custom page or resource, you can use the `notFound` option to specify a custom file to be returned when a requested path is not found. - -```yaml -static: - files: 'static/**' - notFound: 'static/404.html' -``` - -Now if a request is made to a path that does not exist, such as `/non-existent`, the plugin will return the contents of `static/404.html` with a `404` status code. - -#### Fully customize not found response - -Most common in SPAs relying on client-side routing, you may want to override the default `404` status code when a path is not found. - -You can do this by specifying the `notFound` option as an object with a `file` and `statusCode` property. - -```yaml -static: - files: 'static/**' - notFound: - file: 'static/index.html' - statusCode: 200 -``` - -Now if a request is made to a path that does not exist, such as `/non-existent`, the plugin will return the contents of `static/index.html` with a `200` status code. This is particularly useful for SPAs where you want to serve the main application file regardless of the requested path. diff --git a/docs/reference/components/configuration.md b/docs/reference/components/configuration.md deleted file mode 100644 index 2175a03d..00000000 --- a/docs/reference/components/configuration.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: Component Configuration ---- - -# Component Configuration - -> For information on the distinction between the types of components (applications and extensions), refer to beginning of the [Applications](../../developers/applications) documentation section. - -Harper components are configured with a `config.yaml` file located in the root of the component module directory. This file is how an components configures other components it depends on. Each entry in the file starts with a component name, and then configuration values are indented below it. - -```yaml -name: - option-1: value - option-2: value -``` - -It is the entry's `name` that is used for component resolution. It can be one of the [built-in extensions](./built-in-extensions), or it must match a package dependency of the component as specified by `package.json`. The [Custom Component Configuration](#custom-component-configuration) section provides more details and examples. - -For some built-in extensions they can be configured with as little as a top-level boolean; for example, the [rest](./built-in-extensions#rest) extension can be enabled with just: - -```yaml -rest: true -``` - -Most components generally have more configuration options. Some options are ubiquitous to the Harper platform, such as the `files` and `urlPath` options for an [extension](./extensions) or [plugin](./plugins), or `package` for any [custom component](#custom-component-configuration). - -[Extensions](./extensions) and [plugins](./plugins) require specifying the `extensionModule` or `pluginModule` option respectively. Refer to their respective API reference documentation for more information. - -## Custom Component Configuration - -Any custom component **must** be configured with the `package` option in order for Harper to load that component. When enabled, the name of package must match a dependency of the component. For example, to use the `@harperdb/nextjs` extension, it must first be included in `package.json`: - -```json -{ - "dependencies": { - "@harperdb/nextjs": "1.0.0" - } -} -``` - -Then, within `config.yaml` it can be enabled and configured using: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - # ... -``` - -Since npm allows for a [variety of dependency configurations](https://docs.npmjs.com/cli/configuring-npm/package-json#dependencies), this can be used to create custom references. For example, to depend on a specific GitHub branch, first update the `package.json`: - -```json -{ - "dependencies": { - "harper-nextjs-test-feature": "HarperDB/nextjs#test-feature" - } -} -``` - -And now in `config.yaml`: - -```yaml -harper-nextjs-test-feature: - package: '@harperdb/nextjs' - files: './' - # ... -``` - -## Default Component Configuration - -Harper components do not need to specify a `config.yaml`. Harper uses the following default configuration to load components. - -```yaml -rest: true -graphqlSchema: - files: '*.graphql' -roles: - files: 'roles.yaml' -jsResource: - files: 'resources.js' -fastifyRoutes: - files: 'routes/*.js' - urlPath: '.' -static: - files: 'web/**' -``` - -Refer to the [built-in components](./built-in-extensions) documentation for more information on these fields. - -If a `config.yaml` is defined, it will **not** be merged with the default config. diff --git a/docs/reference/components/extensions.md b/docs/reference/components/extensions.md deleted file mode 100644 index 78012b7b..00000000 --- a/docs/reference/components/extensions.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -title: Extensions API ---- - -# Extensions API - -> As of Harper v4.6, a new iteration of the extension API was released called **Plugins**. They are simultaneously a simplification and an extensibility upgrade. Plugins are **experimental**, but we encourage developers to consider developing with the [plugin API](./plugins) instead of the extension API. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -There are two key types of Extensions: **Resource Extension** and **Protocol Extensions**. The key difference is a **Protocol Extensions** can return a **Resource Extension**. - -Furthermore, what defines an extension separately from a component is that it leverages any of the [Resource Extension](#resource-extension-api) or [Protocol Extension](#protocol-extension-api) APIs. - -All extensions must define a `config.yaml` file and declare an `extensionModule` option. This must be a path to the extension module source code. The path must resolve from the root of the module directory. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) `config.yaml` specifies `extensionModule: ./extension.js`. - -If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `extensionModule: ./dist/index.js`) - -## Resource Extension - -A Resource Extension is for processing a certain type of file or directory. For example, the built-in [jsResource](./built-in-extensions#jsresource) extension handles executing JavaScript files. - -Resource Extensions are comprised of four distinct function exports, [`handleFile()`](#handlefilecontents-urlpath-absolutepath-resources-void--promisevoid), [`handleDirectory()`](#handledirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void), [`setupFile()`](#setupfilecontents-urlpath-absolutepath-resources-void--promisevoid), and [`setupDirectory()`](#setupdirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void). The `handleFile()` and `handleDirectory()` methods are executed on **all worker threads**, and are _executed again during restarts_. The `setupFile()` and `setupDirectory()` methods are only executed **once** on the **main thread** during the initial system start sequence. - -> Keep in mind that the CLI command `harperdb restart` or CLI argument `restart=true` only restarts the worker threads. If a component is deployed using `harperdb deploy`, the code within the `setupFile()` and `setupDirectory()` methods will not be executed until the system is completely shutdown and turned back on. - -Other than their execution behavior, the `handleFile()` and `setupFile()` methods, and `handleDirectory()` and `setupDirectory()` methods have identical function definitions (arguments and return value behavior). - -### Resource Extension Configuration - -Any [Resource Extension](#resource-extension) can be configured with the `files` and `urlPath` options. These options control how _files_ and _directories_ are resolved in order to be passed to the extension's `handleFile()`, `setupFile()`, `handleDirectory()`, and `setupDirectory()` methods. - -> Harper relies on the [fast-glob](https://github.com/mrmlnc/fast-glob) library for glob pattern matching. - -- `files` - `string | string[] | Object` - _required_ - A [glob pattern](https://github.com/mrmlnc/fast-glob?tab=readme-ov-file#pattern-syntax) string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the extension. If specified as an object, the `source` property is required. By default, Harper **matches files and directories**; this is configurable using the `only` option. - - `source` - `string | string[]` - _required_ - The glob pattern string or array of strings. - - `only` - `'all' | 'files' | 'directories'` - _optional_ - The glob pattern will match only the specified entry type. Defaults to `'all'`. - - `ignore` - `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. - - If the value starts with `./`, such as `'./static/'`, the component name will be included in the base url path - - If the value is `.`, then the component name will be the base url path - - Note: `..` is an invalid pattern and will result in an error - - Otherwise, the value here will be base url path. Leading and trailing `/` characters will be handled automatically (`/static/`, `/static`, and `static/` are all equivalent to `static`) - -For example, to configure the [static](./built-in-extensions#static) component to serve all HTML files from the `web` source directory on the `static` URL endpoint: - -```yaml -static: - files: 'web/*.html' - urlPath: 'static' -``` - -If there are files such as `web/index.html` and `web/blog.html`, they would be available at `localhost/static/index.html` and `localhost/static/blog.html` respectively. - -Furthermore, if the component is located in the `test-component` directory, and the `urlPath` was set to `'./static/'` instead, then the files would be served from `localhost/test-component/static/*` instead. - -The `urlPath` is optional, for example to configure the [graphqlSchema](./built-in-extensions#graphqlschema) component to load all schemas within the `src/schema` directory, only specifying a `files` glob pattern is required: - -```yaml -graphqlSchema: - files: 'src/schema/*.schema' -``` - -The `files` option also supports a more complex options object. These additional fields enable finer control of the glob pattern matching. - -For example, to match files within `web`, and omit any within the `web/images` directory, the configuration could be: - -```yaml -static: - files: - source: 'web/**/*' - ignore: ['web/images'] -``` - -In order to match only files: - -```yaml -test-component: - files: - source: 'dir/**/*' - only: 'files' -``` - -### Resource Extension API - -In order for an extension to be classified as a Resource Extension it must implement at least one of the `handleFile()`, `handleDirectory()`, `setupFile()`, or `setupDirectory()` methods. As a standalone extension, these methods should be named and exported directly. For example: - -```js -// ESM -export function handleFile() {} -export function setupDirectory() {} - -// or CJS -function handleDirectory() {} -function setupFile() {} - -module.exports = { handleDirectory, setupFile }; -``` - -When returned by a [Protocol Extension](#protocol-extension), these methods should be defined on the object instead: - -```js -export function start() { - return { - handleFile() {}, - }; -} -``` - -#### `handleFile(contents, urlPath, absolutePath, resources): void | Promise` - -#### `setupFile(contents, urlPath, absolutePath, resources): void | Promise` - -These methods are for processing individual files. They can be async. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `contents` - `Buffer` - The contents of the file -- `urlPath` - `string` - The recommended URL path of the file -- `absolutePath` - `string` - The absolute path of the file - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `void | Promise` - -#### `handleDirectory(urlPath, absolutePath, resources): boolean | void | Promise` - -#### `setupDirectory(urlPath, absolutePath, resources): boolean | void | Promise` - -These methods are for processing directories. They can be async. - -If the function returns or resolves a truthy value, then the component loading sequence will end and no other entries within the directory will be processed. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `urlPath` - `string` - The recommended URL path of the directory -- `absolutePath` - `string` - The absolute path of the directory - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `boolean | void | Promise` - -## Protocol Extension - -A Protocol Extension is a more advanced form of a Resource Extension and is mainly used for implementing higher level protocols. For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) handles building and running a Next.js project. A Protocol Extension is particularly useful for adding custom networking handlers (see the [`server`](../globals#server) global API documentation for more information). - -### Protocol Extension Configuration - -In addition to the `files` and `urlPath` [Resource Extension configuration](#resource-extension-configuration) options, and the `package` [Custom Component configuration](#custom-component-configuration) option, Protocol Extensions can also specify additional configuration options. Any options added to the extension configuration (in `config.yaml`), will be passed through to the `options` object of the `start()` and `startOnMainThread()` methods. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs#options) specifies multiple option that can be included in its configuration. For example, a Next.js app using `@harperdb/nextjs` may specify the following `config.yaml`: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - files: './' - prebuilt: true - dev: false -``` - -Many protocol extensions will use the `port` and `securePort` options for configuring networking handlers. Many of the [`server`](../globals#server) global APIs accept `port` and `securePort` options, so components replicated this for simpler pass-through. - -### Protocol Extension API - -A Protocol Extension is made up of two distinct methods, [`start()`](#startoptions-resourceextension--promiseresourceextension) and [`startOnMainThread()`](#startonmainthreadoptions-resourceextension--promiseresourceextension). Similar to a Resource Extension, the `start()` method is executed on _all worker threads_, and _executed again on restarts_. The `startOnMainThread()` method is **only** executed **once** during the initial system start sequence. These methods have identical `options` object parameter, and can both return a Resource Extension (i.e. an object containing one or more of the methods listed above). - -#### `start(options): ResourceExtension | Promise` - -#### `startOnMainThread(options): ResourceExtension | Promise` - -Parameters: - -- `options` - `Object` - An object representation of the extension's configuration options. - -Returns: `Object` - An object that implements any of the [Resource Extension APIs](#resource-extension-api) diff --git a/docs/reference/components/index.md b/docs/reference/components/index.md deleted file mode 100644 index 30ce276d..00000000 --- a/docs/reference/components/index.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Components ---- - -# Components - -**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. Components encapsulate both applications and extensions. - -> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. - -**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. - -**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. - -> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. - -For more information on the differences between applications and extensions, refer to the beginning of the [Applications](../developers/applications/) guide documentation section. - -This technical reference section has detailed information on various component systems: - -- [Built-In Extensions](components/built-in-extensions) -- [Configuration](components/configuration) -- [Managing Applications](components/applications) -- [Extensions](components/extensions) -- [(Experimental) Plugins](components/plugins) - -## Custom Applications - -- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) -- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) -- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) - -## Custom Extensions - -- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) -- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) -- [`@harperdb/astro`](https://github.com/HarperDB/astro) diff --git a/docs/reference/components/plugins.md b/docs/reference/components/plugins.md deleted file mode 100644 index 7ce1d3c3..00000000 --- a/docs/reference/components/plugins.md +++ /dev/null @@ -1,629 +0,0 @@ ---- -title: Experimental Plugins ---- - -# Experimental Plugins - -The new, experimental **plugin** API is an iteration of the existing extension system. It simplifies the API by removing the need for multiple methods (`start`, `startOnMainThread`, `handleFile`, `setupFile`, etc.) and instead only requires a single `handleApplication` method. Plugins are designed to be more extensible and easier to use, and they are intended to replace the concept of extensions in the future. - -Similar to the existing extension API, a plugin must specify an `pluginModule` option within `config.yaml`. This must be a path to the plugin module source code. The path must resolve from the root of the module directory. For example: `pluginModule: plugin.js`. - -If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `pluginModule: ./dist/index.js`) - -It is also recommended that all extensions have a `package.json` that specifies JavaScript package metadata such as name, version, type, etc. Since plugins are just JavaScript packages, they can do anything a JavaScript package can normally do. It can be written in TypeScript, and compiled to JavaScript. It can export an executable (using the [bin](https://docs.npmjs.com/cli/configuring-npm/package-json#bin) property). It can be published to npm. The possibilities are endless! - -The key to a plugin is the [`handleApplication()`](#function-handleapplicationscope-scope-void--promisevoid) method. It must be exported by the `pluginModule`, and cannot coexist with any of the other extension methods such as `start`, `handleFile`, etc. The component loader will throw an error if both are defined. - -The `handleApplication()` method is executed **sequentially** across all **worker threads** during the component loading sequence. It receives a single, `scope` argument that contains all of the relevant metadata and APIs for interacting with the associated component. - -The method can be async and it is awaited by the component loader. - -However, it is highly recommended to avoid event-loop-blocking operations within the `handleApplication()` method. See the examples section for best practices on how to use the `scope` argument effectively. - -## Configuration - -As plugins are meant to be used by applications in order to implement some feature, many plugins provide a variety of configuration options to customize their behavior. Some plugins even require certain configuration options to be set in order to function properly. - -As a brief overview, the general configuration options available for plugins are: - -- `files` - `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - _optional_ - A glob pattern string or array of strings that specifies the files and directories to be handled by the plugin's default `EntryHandler` instance. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries handled by the plugin's default `EntryHandler` instance. -- `timeout` - `number` - _optional_ - The timeout in milliseconds for the plugin's operations. If not specified, the system default is **30 seconds**. Plugins may override the system default themselves, but this configuration option is the highest priority and takes precedence. - -### File Entries - -Just like extensions, plugins support the `files` and `urlPath` options for file entry matching. The values specified for these options are used for the default `EntryHandler` instance created by the `scope.handleEntry()` method. As the reference documentation details, similar options can be used to create custom `EntryHandler` instances too. - -The `files` option can be a glob pattern string, an array of glob pattern strings, or a more expressive glob options object. - -- The patterns **cannot** contain `..` or start with `/`. -- The pattern `.` or `./` is transformed into `**/*` automatically. -- Often, it is best to omit a leading `.` or `./` in the glob pattern. - -The `urlPath` option is a base URL path that is prepended to the resolved `files` entries. - -- It **cannot** contain `..`. -- If it starts with `./` or is just `.`, the name of the plugin will be automatically prepended to it. - -Putting this all together, to configure the [static](./built-in-extensions#static) built-in extension to serve files from the `web` directory but at the `/static/` path, the `config.yaml` would look like this: - -```yaml -static: - files: 'web/**/*' - urlPath: '/static/' -``` - -Keep in mind the `urlPath` option is completely optional. - -As another example, to configure the [graphqlSchema](./built-in-extensions#graphqlschema) built-in extension to serve only `*.graphql` files from within the top-level of the `src/schema` directory, the `config.yaml` would look like this: - -```yaml -graphqlSchema: - files: 'src/schema/*.graphql' -``` - -As detailed, the `files` option also supports a more complex object syntax for advanced use cases. - -For example, to match files within the `web` directory, and omit any within `web/images`, you can use a configuration such as: - -```yaml -static: - files: - source: 'web/**/*' - ignore: 'web/images/**' -``` - -> If you're transitioning from the [extension](./extensions) system, the `files` option object no longer supports an `only` field. Instead, use the `entryEvent.entryType` or the specific `entryEvent.eventType` fields in [`onEntryEventHandler(entryEvent)`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) method or any of the specific [`EntryHandler`](#class-entryhandler) events. - -### Timeouts - -The default timeout for all plugins is **30 seconds**. If the method does not complete within this time, the component loader will throw an error and unblock the component loading sequence. This is to prevent the component loader from hanging indefinitely if a plugin fails to respond or takes too long to execute. - -The plugin module can export a `defaultTimeout` variable (in milliseconds) that will override the system default. - -For example: - -```typescript -export const defaultTimeout = 60_000; // 60 seconds -``` - -Additionally, users can specify a `timeout` option in their application's `config.yaml` file for a specific plugin. This option takes precedence over the plugin's `defaultTimeout` and the system default. - -For example: - -```yaml -customPlugin: - package: '@harperdb/custom-plugin' - files: 'foo.js' - timeout: 45_000 # 45 seconds -``` - -## Example: Statically hosting files - -This is a functional example of how the `handleApplication()` method and `scope` argument can be used to create a simple static file server plugin. This example assumes that the component has a `config.yaml` with the `files` option set to a glob pattern that matches the files to be served. - -> This is a simplified form of the [static](./built-in-extensions#static) built-in extension. - -```js -export function handleApplication(scope) { - const staticFiles = new Map(); - - scope.options.on('change', (key, value, config) => { - if (key[0] === 'files' || key[0] === 'urlPath') { - // If the files or urlPath options change, we need to reinitialize the static files map - staticFiles.clear(); - logger.info(`Static files reinitialized due to change in ${key.join('.')}`); - } - }); - - scope.handleEntry((entry) => { - if (entry.entryType === 'directory') { - logger.info(`Cannot serve directories. Update the files option to only match files.`); - return; - } - - switch (entry.eventType) { - case 'add': - case 'change': - // Store / Update the file contents in memory for serving - staticFiles.set(entry.urlPath, entry.contents); - break; - case 'unlink': - // Remove the file from memory when it is deleted - staticFiles.delete(entry.urlPath); - break; - } - }); - - scope.server.http( - (req, next) => { - if (req.method !== 'GET') return next(req); - - // Attempt to retrieve the requested static file from memory - const staticFile = staticFiles.get(req.pathname); - - return staticFile - ? { - statusCode: 200, - body: staticFile, - } - : { - statusCode: 404, - body: 'File not found', - }; - }, - { runFirst: true } - ); -} -``` - -In this example, the entry handler method passed to `handleEntry` will manage the map of static files in memory using their computed `urlPath` and the `contents`. If the config file changes (and thus a new default file or url path is specified) the plugin will clear the file map as well to remove artifacts. Furthermore, it uses the `server.http()` middleware to hook into the HTTP request handling. - -This example is heavily simplified, but it demonstrates how the different key parts of `scope` can be used together to provide a performant and reactive application experience. - -## API - -### TypeScript support - -The classes and types referenced below are all exported by the `harperdb` package. Just import the ones you need like this: - -```typescript -import { Scope, type Config } from 'harperdb'; -``` - -### Function: `handleApplication(scope: Scope): void | Promise` - -Parameters: - -- `scope` - [`Scope`](#class-scope) - An instance of the `Scope` class that provides access to the relative application's configuration, resources, and other APIs. - -Returns: `void | Promise` - -This is the only method a plugin module must export. It can be async and is awaited by the component loader. The `scope` argument provides access to the relative application's configuration, resources, and other APIs. - -### Class: `Scope` - -- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -#### Event: `'close'` - -Emitted after the scope is closed via the `close()` method. - -#### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -#### Event: `'ready'` - -Emitted when the Scope is ready to be used after loading the associated config file. It is awaited by the component loader, so it is not necessary to await it within the `handleApplication()` method. - -#### `scope.close()` - -Returns: `this` - The current `Scope` instance. - -Closes all associated entry handlers, the associated `scope.options` instance, emits the `'close'` event, and then removes all other listeners on the instance. - -#### `scope.handleEntry([files][, handler])` - -Parameters: - -- `files` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) | [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ -- `handler` - [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ - -Returns: [`EntryHandler`](#class-entryhandler) - An instance of the `EntryHandler` class that can be used to handle entries within the scope. - -The `handleEntry()` method is the key to handling file system entries specified by a `files` glob pattern option in `config.yaml`. This method is used to register an entry event handler, specifically for the `EntryHandler` [`'all'`](#event-all) event. The method signature is very flexible, and allows for the following variations: - -- `scope.handleEntry()` (with no arguments) Returns the default `EntryHandler` created by the `files` and `urlPath` options in the `config.yaml`. -- `scope.handleEntry(handler)` (where `handler` is an `onEntryEventHandler`) Returns the default `EntryHandler` instance (based on the options within `config.yaml`) and uses the provided `handler` for the [`'all'`](#event-all) event. -- `scope.handleEntry(files)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`) Returns a new `EntryHandler` instance that handles the specified `files` configuration. -- `scope.handleEntry(files, handler)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`, and `handler` is an `onEntryEventHandler`) Returns a new `EntryHandler` instance that handles the specified `files` configuration and uses the provided `handler` for the [`'all'`](#event-all) event. - -For example: - -```js -export function handleApplication(scope) { - // Get the default EntryHandler instance - const defaultEntryHandler = scope.handleEntry(); - - // Assign a handler for the 'all' event on the default EntryHandler - scope.handleEntry((entry) => { - /* ... */ - }); - - // Create a new EntryHandler for the 'src/**/*.js' files option with a custom `'all'` event handler. - const customEntryHandler = scope.handleEntry( - { - files: 'src/**/*.js', - }, - (entry) => { - /* ... */ - } - ); - - // Create another custom EntryHandler for the 'src/**/*.ts' files option, but without a `'all'` event handler. - const anotherCustomEntryHandler = scope.handleEntry({ - files: 'src/**/*.ts', - }); -} -``` - -And thus, if the previous code was used by a component with the following `config.yaml`: - -```yaml -customPlugin: - files: 'web/**/*' -``` - -Then the default `EntryHandler` instances would be created to handle all entries within the `web` directory. - -#### `scope.requestRestart()` - -Returns: `void` - -Request a Harper restart. This **does not** restart the instance immediately, but rather indicates to the user that a restart is required. This should be called when the plugin cannot handle the entry event and wants to indicate to the user that the Harper instance should be restarted. - -This method is called automatically by the `scope` instance if the user has not defined an `scope.options.on('change')` handler or if an event handler exists and is missing a necessary handler method. - -#### `scope.resources` - -Returns: `Map` - A map of the currently loaded [Resource](../globals#resource) instances. - -#### `scope.server` - -Returns: `server` - A reference to the [server](../globals#server) global API. - -#### `scope.options` - -Returns: [`OptionsWatcher`](#class-optionswatcher) - An instance of the `OptionsWatcher` class that provides access to the application's configuration options. Emits `'change'` events when the respective plugin part of the component's config file is modified. - -For example, if the plugin `customPlugin` is configured by an application with: - -```yaml -customPlugin: - files: 'foo.js' -``` - -And has the following `handleApplication(scope)` implementation: - -```typescript -export function handleApplication(scope) { - scope.options.on('change', (key, value, config) => { - if (key[0] === 'files') { - // Handle the change in the files option - scope.logger.info(`Files option changed to: ${value}`); - } - }); -} -``` - -Then modifying the `files` option in the `config.yaml` to `bar.js` would log the following: - -```plaintext -Files option changed to: bar.js -``` - -#### `scope.logger` - -Returns: `logger` - A scoped instance of the [`logger`](../globals#logger) class that provides logging capabilities for the plugin. - -It is recommended to use this instead of the `logger` global. - -#### `scope.name` - -Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. - -#### `scope.directory` - -Returns: `string` - The directory of the application. This is the root directory of the component where the `config.yaml` file is located. - -### Interface: `FilesOption` - -- `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - -### Interface: `FilesOptionObject` - -- `source` - `string` | `string[]` - _required_ - The glob pattern string or array of strings. -- `ignore` - `string` | `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. - -### Interface: `FileAndURLPathConfig` - -- `files` - [`FilesOption`](#interface-filesoption) - _required_ - A glob pattern string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the plugin. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. - -### Class: `OptionsWatcher` - -- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -#### Event: `'change'` - -- `key` - `string[]` - The key of the changed option split into parts (e.g. `foo.bar` becomes `['foo', 'bar']`). -- `value` - [`ConfigValue`](#interface-configvalue) - The new value of the option. -- `config` - [`ConfigValue`](#interface-configvalue) - The entire configuration object of the plugin. - -The `'change'` event is emitted whenever an configuration option is changed in the configuration file relative to the application and respective plugin. - -Given an application using the following `config.yaml`: - -```yaml -customPlugin: - files: 'web/**/*' -otherPlugin: - files: 'index.js' -``` - -The `scope.options` for the respective plugin's `customPlugin` and `otherPlugin` would emit `'change'` events when the `files` options relative to them are modified. - -For example, if the `files` option for `customPlugin` is changed to `web/**/*.js`, the following event would be emitted _only_ within the `customPlugin` scope: - -```js -scope.options.on('change', (key, value, config) => { - key; // ['files'] - value; // 'web/**/*.js' - config; // { files: 'web/**/*.js' } -}); -``` - -#### Event: `'close'` - -Emitted when the `OptionsWatcher` is closed via the `close()` method. The watcher is not usable after this event is emitted. - -#### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -#### Event: `'ready'` - -- `config` - [`ConfigValue`](#interface-configvalue) | `undefined` - The configuration object of the plugin, if present. - -This event can be emitted multiple times. It is first emitted upon the initial load, but will also be emitted after restoring a configuration file or configuration object after a `'remove'` event. - -#### Event: `'remove'` - -The configuration was removed. This can happen if the configuration file was deleted, the configuration object within the file is deleted, or if the configuration file fails to parse. Once restored, the `'ready'` event will be emitted again. - -#### `options.close()` - -Returns: `this` - The current `OptionsWatcher` instance. - -Closes the options watcher, removing all listeners and preventing any further events from being emitted. The watcher is not usable after this method is called. - -#### `options.get(key)` - -Parameters: - -- `key` - `string[]` - The key of the option to get, split into parts (e.g. `foo.bar` is represented as `['foo', 'bar']`). - -Returns: [`ConfigValue`](#interface-configvalue) | `undefined` - -If the config is defined it will attempt to retrieve the value of the option at the specified key. If the key does not exist, it will return `undefined`. - -#### `options.getAll()` - -Returns: [`ConfigValue`](#interface-configvalue) | `undefined` - -Returns the entire configuration object for the plugin. If the config is not defined, it will return `undefined`. - -#### `options.getRoot()` - -Returns: [`Config`](#interface-config) | `undefined` - -Returns the root configuration object of the application. This is the entire configuration object, basically the parsed form of the `config.yaml`. If the config is not defined, it will return `undefined`. - -#### Interface: `Config` - -- `[key: string]` [`ConfigValue`](#interface-configvalue) - -An object representing the `config.yaml` file configuration. - -#### Interface: `ConfigValue` - -- `string` | `number` | `boolean` | `null` | `undefined` | `ConfigValue[]` | [`Config`](#interface-config) - -Any valid configuration value type. Essentially, the primitive types, an array of those types, or an object comprised of values of those types. - -### Class: `EntryHandler` - -Extends: [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -Created by calling [`scope.handleEntry()`](#scopehandleentry) method. - -#### Event: `'all'` - -- `entry` - [`FileEntry`](#interface-fileentry) | [`DirectoryEntry`](#interface-directoryentry) - The entry that was added, changed, or removed. - -The `'all'` event is emitted for all entry events, including file and directory events. This is the event that the handler method in `scope.handleEntry` is registered for. The event handler receives an `entry` object that contains the entry metadata, such as the file contents, URL path, and absolute path. - -An effective pattern for this event is: - -```js -async function handleApplication(scope) { - scope.handleEntry((entry) => { - switch (entry.eventType) { - case 'add': - // Handle file addition - break; - case 'change': - // Handle file change - break; - case 'unlink': - // Handle file deletion - break; - case 'addDir': - // Handle directory addition - break; - case 'unlinkDir': - // Handle directory deletion - break; - } - }); -} -``` - -#### Event: `'add'` - -- `entry` - [`AddFileEvent`](#interface-addfileevent) - The file entry that was added. - -The `'add'` event is emitted when a file is created (or the watcher sees it for the first time). The event handler receives an `AddFileEvent` object that contains the file contents, URL path, absolute path, and other metadata. - -#### Event: `'addDir'` - -- `entry` - [`AddDirectoryEvent`](#interface-adddirectoryevent) - The directory entry that was added. - -The `'addDir'` event is emitted when a directory is created (or the watcher sees it for the first time). The event handler receives an `AddDirectoryEvent` object that contains the URL path and absolute path of the directory. - -#### Event: `'change'` - -- `entry` - [`ChangeFileEvent`](#interface-changefileevent) - The file entry that was changed. - -The `'change'` event is emitted when a file is modified. The event handler receives a `ChangeFileEvent` object that contains the updated file contents, URL path, absolute path, and other metadata. - -#### Event: `'close'` - -Emitted when the entry handler is closed via the [`entryHandler.close()`](#entryhandlerclose) method. - -#### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -#### Event: `'ready'` - -Emitted when the entry handler is ready to be used. This is not automatically awaited by the component loader, but also is not required. Calling `scope.handleEntry()` is perfectly sufficient. This is generally useful if you need to do something _after_ the entry handler is absolutely watching and handling entries. - -#### Event: `'unlink'` - -- `entry` - [`UnlinkFileEvent`](#interface-unlinkfileevent) - The file entry that was deleted. - -The `'unlink'` event is emitted when a file is deleted. The event handler receives an `UnlinkFileEvent` object that contains the URL path and absolute path of the deleted file. - -#### Event: `'unlinkDir'` - -- `entry` - [`UnlinkDirectoryEvent`](#interface-unlinkdirectoryevent) - The directory entry that was deleted. - -The `'unlinkDir'` event is emitted when a directory is deleted. The event handler receives an `UnlinkDirectoryEvent` object that contains the URL path and absolute path of the deleted directory. - -#### `entryHandler.name` - -Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. - -The name of the plugin. - -#### `entryHandler.directory` - -Returns: `string` - -The directory of the application. This is the root directory of the component where the `config.yaml` file is located. - -#### `entryHandler.close()` - -Returns: `this` - The current `EntryHandler` instance. - -Closes the entry handler, removing all listeners and preventing any further events from being emitted. The handler can be started again using the [`entryHandler.update()`](#entryhandlerupdateconfig) method. - -#### `entryHandler.update(config)` - -Parameters: - -- `config` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) - The configuration object for the entry handler. - -This method will update an existing entry handler to watch new entries. It will close the underlying watcher and create a new one, but will maintain any existing listeners on the EntryHandler instance itself. - -This method returns a promise associated with the ready event of the updated handler. - -#### Interface: `BaseEntry` - -- `stats` - [`fs.Stats`](https://nodejs.org/docs/latest/api/fs.html#class-fsstats) | `undefined` - The file system stats for the entry. -- `urlPath` - `string` - The recommended URL path of the entry. -- `absolutePath` - `string` - The absolute path of the entry. - -The foundational entry handle event object. The `stats` may or may not be present depending on the event, entry type, and platform. - -The `urlPath` is resolved based on the configured pattern (`files:` option) combined with the optional `urlPath` option. This path is generally useful for uniquely representing the entry. It is used in the built-in components such as `jsResource` and `static`. - -The `absolutePath` is the file system path for the entry. - -#### Interface: `FileEntry` - -Extends [`BaseEntry`](#interface-baseentry) - -- `contents` - `Buffer` - The contents of the file. - -A specific extension of the `BaseEntry` interface representing a file entry. We automatically read the contents of the file so the user doesn't have to bother with FS operations. - -There is no `DirectoryEntry` since there is no other important metadata aside from the `BaseEntry` properties. If a user wants the contents of a directory, they should adjust the pattern to resolve files instead. - -#### Interface: `EntryEvent` - -Extends [`BaseEntry`](#interface-baseentry) - -- `eventType` - `string` - The type of entry event. -- `entryType` - `string` - The type of entry, either a file or a directory. - -A general interface representing the entry handle event objects. - -#### Interface: `AddFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'add'` -- `entryType` - `'file'` - -Event object emitted when a file is created (or the watcher sees it for the first time). - -#### Interface: `ChangeFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'change'` -- `entryType` - `'file'` - -Event object emitted when a file is modified. - -#### Interface: `UnlinkFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'unlink'` -- `entryType` - `'file'` - -Event object emitted when a file is deleted. - -#### Interface: `FileEntryEvent` - -- `AddFileEvent` | `ChangeFileEvent` | `UnlinkFileEvent` - -A union type representing the file entry events. These events are emitted when a file is created, modified, or deleted. The `FileEntry` interface provides the file contents and other metadata. - -#### Interface: `AddDirectoryEvent` - -Extends [`EntryEvent`](#interface-entryevent) - -- `eventType` - `'addDir'` -- `entryType` - `'directory'` - -Event object emitted when a directory is created (or the watcher sees it for the first time). - -#### Interface: `UnlinkDirectoryEvent` - -Extends [`EntryEvent`](#interface-entryevent) - -- `eventType` - `'unlinkDir'` -- `entryType` - `'directory'` - -Event object emitted when a directory is deleted. - -#### Interface: `DirectoryEntryEvent` - -- `AddDirectoryEvent` | `UnlinkDirectoryEvent` - -A union type representing the directory entry events. There are no change events for directories since they are not modified in the same way as files. - -#### Function: `onEntryEventHandler(entryEvent: FileEntryEvent | DirectoryEntryEvent): void` - -Parameters: - -- `entryEvent` - [`FileEntryEvent`](#interface-fileentryevent) | [`DirectoryEntryEvent`](#interface-directoryentryevent) - -Returns: `void` - -This function is what is passed to the `scope.handleEntry()` method as the handler for the `'all'` event. This is also applicable to a custom `.on('all', handler)` method for any `EntryHandler` instance. diff --git a/docs/reference/content-types.md b/docs/reference/content-types.md deleted file mode 100644 index b7d223f4..00000000 --- a/docs/reference/content-types.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -Harper supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. Harper follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard Harper operations. - -:::tip Need a custom content type? - -Harper's extensible content type system lets you add support for any serialization format (XML, YAML, proprietary formats, etc.) by registering custom handlers in the [`contentTypes`](./globals.md#contenttypes) global Map. See the linked API reference for detailed implementation types, handler properties, and examples. - -::: - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by Harper, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with Harper. CBOR supports the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and Harper's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all Harper data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with Harper's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. - -In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/docs/reference/data-types.md b/docs/reference/data-types.md deleted file mode 100644 index df03e718..00000000 --- a/docs/reference/data-types.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -Harper supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (Harper’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. Harper supports MessagePack and CBOR, which allows for all of Harper supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: - -- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") -- `Int` - Any integer between from -2147483648 to 2147483647 -- `Long` - Any integer between from -9007199254740992 to 9007199254740992 -- `BigInt` - Any integer (negative or positive) with less than 300 digits - -Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in Harper. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in Harper’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in Harper. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in Harper property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well, with two different data types that are available: - -### Bytes - -JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -### Blobs - -Binary data can also be stored with [`Blob`s](blob), which can scale much better for larger content than `Bytes`, as it is designed to be streamed and does not need to be held entirely in memory. It is recommended that `Blob`s are used for content larger than 20KB. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in Harper as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/docs/reference/dynamic-schema.md b/docs/reference/dynamic-schema.md deleted file mode 100644 index 97f5792d..00000000 --- a/docs/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. Harper tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -Harper databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -Harper tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in Harper operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [Harper Storage Algorithm](storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to Harper. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. Harper offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -Harper automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [Harper API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.6/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/docs/reference/globals.md b/docs/reference/globals.md deleted file mode 100644 index 4316d897..00000000 --- a/docs/reference/globals.md +++ /dev/null @@ -1,422 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with Harper is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that Harper provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -## `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -## `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -## `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](resources/) for more details about implementing a Resource class. - -## `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -## `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -## `server` - -The `server` global object provides a number of functions and objects to interact with Harper's HTTP, networking, and authentication services. - -### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` - -Alias: `server.request` - -Add a handler method to the HTTP server request listener middleware chain. - -Returns an array of server instances based on the specified `options.port` and `options.securePort`. - -Example: - -```js -server.http( - (request, next) => { - return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); - }, - { - runFirst: true, // run this handler first - } -); -``` - -#### `RequestListener` - -Type: `(request: Request, next: RequestListener) => Promise` - -The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. - -### `Request` and `Response` - -The `Request` and `Response` classes are based on the WHATWG APIs for the [`Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) and [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) classes. Requests and responses are based on these standard-based APIs to facilitate reuse with modern web code. While Node.js' HTTP APIs are powerful low-level APIs, the `Request`/`Response` APIs provide excellent composability characteristics, well suited for layered middleware and for clean mapping to [RESTful method handlers](./resources/) with promise-based responses, as well as interoperability with other standards-based APIs like [streams](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) used with [`Blob`s](https://developer.mozilla.org/en-US/docs/Web/API/Blob). However, the Harper implementation of these classes is not a direct implementation of the WHATWG APIs, but implements additional/distinct properties for the the Harper server environment: - -#### `Request` - -A `Request` object is passed to the direct static REST handlers, and preserved as the context for instance methods, and has the following properties: - -- `url` - This is the request target, which is the portion of the URL that was received by the server. If a client sends a request to `https://example.com:8080/path?query=string`, the actual received request is `GET /path?query=string` and the `url` property will be `/path?query=string`. -- `method` - This is the HTTP method of the request. This is a string like `GET`, `POST`, `PUT`, `DELETE`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the request. -- `pathname` - This is the path portion of the URL, without the query string. For example, if the URL is `/path?query=string`, the `pathname` will be `/path`. -- `protocol` - This is the protocol of the request, like `http` or `https`. -- `data` - This is the deserialized body of the request (based on the type of data specified by `Content-Type` header). -- `ip` - This is the remote IP address of the client that made the request (or the remote IP address of the last proxy to connect to Harper). -- `host` - This is the host of the request, like `example.com`. -- `sendEarlyHints(link: string, headers?: object): void` - This method sends an early hints response to the client, prior to actually returning a response. This is useful for sending a link header to the client to indicate that another resource should be preloaded. The `headers` argument can be used to send additional headers with the early hints response, in addition to the `link`. This is generally most helpful in a cache resolution function, where you can send hints _if_ the data is not in the cache and is resolving from an origin: - -```javascript -class Origin { - async get(request) { - // if we are fetching data from origin, send early hints - this.getContext().requestContext.sendEarlyHints(''); - let response = await fetch(request); - ... - } -} -Cache.sourcedFrom(Origin); -``` - -- `login(username, password): Promise` - This method can be called to start an authenticated session. The login will authenticate the user by username and password. If the authentication was successful, a session will be created and a cookie will be set on the response header that references the session. All subsequent requests from the client that sends the cookie in requests will be authenticated as the user that logged in and the session record will be attached to the request. This method returns a promise that resolves when the login is successful, and rejects if the login is unsuccessful. -- `session` - This is the session object that is associated with current cookie-maintained session. This object is used to store session data for the current session. This is `Table` record instance, and can be updated by calling `request.session.update({ key: value })` or session can be retrieved with `request.session.get()`. If the cookie has not been set yet, a cookie will be set the first time a session is updated or a login occurs. -- `_nodeRequest` - This is the underlying Node.js [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) object. This can be used to access the raw request data, such as the raw headers, raw body, etc. However, this is discouraged and should be used with caution since it will likely break any other server handlers that depends on the layered `Request` call with `Response` return pattern. -- `_nodeResponse` - This is the underlying Node.js [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) object. This can be used to access the raw response data, such as the raw headers. Again, this is discouraged and can cause problems for middleware, should only be used if you are certain that other server handlers will not attempt to return a different `Response` object. - -#### `Response` - -REST methods can directly return data that is serialized and returned to users, or it can return a `Response` object (or a promise to a `Response`), or it can return a `Response`-like object with the following properties (or again, a promise to it): - -- `status` - This is the HTTP status code of the response. This is a number like `200`, `404`, `500`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the response. -- `data` - This is the data to be returned of the response. This will be serialized using Harper's content negotiation. -- `body` - Alternately (to `data`), the raw body can be returned as a `Buffer`, string, stream (Node.js or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)), or a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). - -#### `HttpOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -#### `HttpServer` - -Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. - -### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` - -Creates a socket server on the specified `options.port` or `options.securePort`. - -Only one socket server will be created. A `securePort` takes precedence. - -#### `ConnectionListener` - -Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) - -#### `SocketOptions` - -- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. -- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -#### `SocketServer` - -Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` - -Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](globals#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. - -Example: - -```js -server.ws((ws, request, chainCompletion) => { - chainCompletion.then(() => { - ws.on('error', console.error); - - ws.on('message', function message(data) { - console.log('received: %s', data); - }); - - ws.send('something'); - }); -}); -``` - -#### `WsListener` - -Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` - -The WebSocket connection listener. - -- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. -- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. -- The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. -- The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` - -#### `WsOptions` - -Type: `Object` - -Properties: - -- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` - -Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](globals#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. - -This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. - -Example: - -> This example is from the Harper Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) - -```js -server.upgrade( - (request, socket, head, next) => { - if (request.url === '/_next/webpack-hmr') { - return upgradeHandler(request, socket, head).then(() => { - request.__harperdb_request_upgraded = true; - - next(request, socket, head); - }); - } - - return next(request, socket, head); - }, - { runFirst: true } -); -``` - -#### `UpgradeListener` - -Type: `(request, socket, head, next) => void` - -The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. - -#### `UpgradeOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.config` - -This provides access to the Harper configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into Harper's analytics. Harper efficiently records and tracks these metrics and makes them available through [analytics API](analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. - -### `server.getUser(username): Promise` - -This returns the user object with permissions/authorization information based on the provided username. This does not verify the password, so it is generally used for looking up users by username. If you want to verify a user by password, use [`server.authenticateUser`](globals#serverauthenticateuserusername-password-user). - -### `server.authenticateUser(username, password): Promise` - -This returns the user object with permissions/authorization information based on the provided username. The password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -### `server.resources: Resources` - -This provides access to the map of all registered resources. This is the central registry in Harper for registering any resources to be exported for use by REST, MQTT, or other components. Components that want to register resources should use the `server.resources.set(name, resource)` method to add to this map. Exported resources can be found by passing in a path to `server.resources.getMatch(path)` which will find any resource that matches the path or beginning of the path. - -#### `server.resources.set(name, resource, exportTypes?)` - -Register a resource with the server. For example: - -``` -class NewResource extends Resource { -} -server.resources.set('NewResource', Resource); -/ or limit usage: -server.resources.set('NewResource', Resource, { rest: true, mqtt: false, 'my-protocol': true }); -``` - -#### `server.resources.getMatch(path, exportType?)` - -Find a resource that matches the path. For example: - -``` -server.resources.getMatch('/NewResource/some-id'); -/ or specify the export/protocol type, to allow it to be limited: -server.resources.getMatch('/NewResource/some-id', 'my-protocol'); -``` - -### `server.operation(operation: Object, context?: Object, authorize?: boolean)` - -Execute an operation from the [Operations API](../developers/operations-api) - -Parameters: - -- `operation` - `Object` - Object matching desired operation's request body -- `context` - `Object` - `{ username: string}` - _optional_ - The specified user -- `authorize` - `boolean` - _optional_ - Indicate the operation should authorize the user or not. Defaults to `false` - -Returns a `Promise` with the operation's response as per the [Operations API documentation](../developers/operations-api). - -### `server.nodes` - -Returns an array of node objects registered in the cluster - -### `server.shards` - -Returns map of shard number to an array of its associated nodes - -### `server.hostname` - -Returns the hostname of the current node - -### `server.contentTypes` - -Returns the `Map` of registered content type handlers. Same as the [`contentTypes`](./globals#contenttypes) global. - -## `contentTypes` - -Returns a [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for request/response serialization. - -HarperDB uses content negotiation to automatically handle data serialization and deserialization for HTTP requests and other protocols. This process works by: - -1. **Request Processing**: Comparing the `Content-Type` header with registered handlers to deserialize incoming data into structured formats for processing and storage -2. **Response Generation**: Comparing the `Accept` header with registered handlers to serialize structured data into the appropriate response format - -### Built-in Content Types - -HarperDB includes handlers for common formats: - -- **JSON** (`application/json`) -- **CBOR** (`application/cbor`) -- **MessagePack** (`application/msgpack`) -- **CSV** (`text/csv`) -- **Event-Stream** (`text/event-stream`) -- And more... - -### Custom Content Type Handlers - -You can extend or replace content type handlers by modifying the `contentTypes` map from the `server` global (or `harperdb` export). The map is keyed by MIME type, with values being handler objects containing these optional properties: - -#### Handler Properties - -- **`serialize(data: any): Buffer | Uint8Array | string`** - Called to convert data structures into the target format for responses. Should return binary data (Buffer/Uint8Array) or a string. - -- **`serializeStream(data: any): ReadableStream`** - Called to convert data structures into streaming format. Useful for handling asynchronous iterables or large datasets. - -- **`deserialize(buffer: Buffer | string): any`** - Called to convert incoming request data into structured format. Receives a string for text MIME types (`text/*`) and a Buffer for binary types. Only used if `deserializeStream` is not defined. - -- **`deserializeStream(stream: ReadableStream): any`** - Called to convert incoming request streams into structured format. Returns deserialized data (potentially as an asynchronous iterable). - -- **`q: number`** _(default: 1)_ - Quality indicator between 0 and 1 representing serialization fidelity. Used in content negotiation to select the best format when multiple options are available. The server chooses the content type with the highest product of client quality × server quality values. - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` diff --git a/docs/reference/graphql.md b/docs/reference/graphql.md deleted file mode 100644 index cc43eec9..00000000 --- a/docs/reference/graphql.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: GraphQL Querying ---- - -# GraphQL Querying - -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../developers/applications/defining-schemas), and for querying [Resources](./resources/). - -Get started by setting `graphql: true` in `config.yaml`. - -This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. - -> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. - -Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. - -For example, to request the GraphQL Query: - -```graphql -query GetDogs { - Dog { - id - name - } -} -``` - -The `GET` request would look like: - -```http -GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D -Accept: application/graphql-response+json -``` - -And the `POST` request would look like: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDogs { Dog { id name } } }" -} -``` - -> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. - -The Harper GraphQL querying system is strictly limited to exported Harper Resources. For many users, this will typically be a table that uses the `@exported` directive in its schema. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resources/#query) for more complex queries. - -Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: - -```graphql -query GetDogsAndOwners { - Dog { - id - name - breed - } - - Owner { - id - name - occupation - } -} -``` - -This will return all dogs and owners in the database. And is equivalent to executing two REST queries: - -```http -GET /Dog/?select(id,name,breed) -# and -GET /Owner/?select(id,name,occupation) -``` - -### Request Parameters - -There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` - -1. `query` - _Required_ - The string representation of the GraphQL document. - 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. - 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). - 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. -1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter -1. `variables` - _Optional_ - A map of variable values to be used for the specified query - -### Type Checking - -The Harper GraphQL Querying system takes many liberties from the GraphQL specification. This extends to how it handle type checking. In general, the querying system does **not** type check. Harper uses the `graphql` parser directly, and then performs a transformation on the resulting AST. We do not control any type checking/casting behavior of the parser, and since the execution step diverges from the spec greatly, the type checking behavior is only loosely defined. - -In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. - -For example, the variable `$name: String!` states that `name` should be a non-null, string value. - -- If the request does not contain the `name` variable, an error will be returned -- If the request provides `null` for the `name` variable, an error will be returned -- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. -- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. - - If `null` is provided as the variable value, an error will still be returned. - - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. -- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. - -The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. - -- Objects will be transformed into properly nested attributes -- Strings and Boolean values are passed through as their AST values -- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. -- List and Enums are not supported. - -### Fragments - -The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. - -For example, in the query - -```graphql -query Get { - Dog { - ...DogFields - } -} - -fragment DogFields on Dog { - name - breed -} -``` - -The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). - -You can literally specify anything in the fragment and it will behave the same way: - -```graphql -fragment DogFields on Any { ... } # this is recommended -fragment DogFields on Cat { ... } -fragment DogFields on Animal { ... } -fragment DogFields on LiterallyAnything { ... } -``` - -As an actual example, fragments should be used for composition: - -```graphql -query Get { - Dog { - ...sharedFields - breed - } - Owner { - ...sharedFields - occupation - } -} - -fragment sharedFields on Any { - id - name -} -``` - -### Short Form Querying - -Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. - -For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. - -```graphql -query GetDog($id: ID!) { - Dog(id: $id) { - name - breed - owner { - name - } - } -} -``` - -And as a properly formed request: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", - "variables": { - "id": "0" - } -} -``` - -The REST equivalent would be: - -```http -GET /Dog/?id==0&select(name,breed,owner{name}) -# or -GET /Dog/0?select(name,breed,owner{name}) -``` - -Short form queries can handle nested attributes as well. - -For example, return all dogs who have an owner with the name `"John"` - -```graphql -query GetDog { - Dog(owner: { name: "John" }) { - name - breed - owner { - name - } - } -} -``` - -Would be equivalent to - -```http -GET /Dog/?owner.name==John&select(name,breed,owner{name}) -``` - -And finally, we can put all of these together to create semi-complex, equality based queries! - -The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. - -```graphql -query GetDog($dogName: String!, $ownerName: String!) { - Dog(name: $dogName, owner: { name: $ownerName }) { - name - breed - owner { - name - } - } -} -``` - -### Long Form Querying - -> Coming soon! - -### Mutations - -> Coming soon! - -### Subscriptions - -> Coming soon! - -### Directives - -> Coming soon! diff --git a/docs/reference/headers.md b/docs/reference/headers.md deleted file mode 100644 index 5c85fc88..00000000 --- a/docs/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Harper Headers ---- - -# Harper Headers - -All Harper API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all Harper API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/docs/reference/index.md b/docs/reference/index.md deleted file mode 100644 index 4c5d867a..00000000 --- a/docs/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for Harper. - -Please choose a topic from the navigation menu on the left. diff --git a/docs/reference/limits.md b/docs/reference/limits.md deleted file mode 100644 index 97214620..00000000 --- a/docs/reference/limits.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Harper Limits ---- - -# Harper Limits - -This document outlines limitations of Harper. - -## Database Naming Restrictions - -**Case Sensitivity** - -Harper database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -Harper database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -Harper limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. - -## Primary Keys - -The maximum length of a primary key is 1978 bytes or 659 characters (whichever is shortest). diff --git a/docs/reference/resources/index.md b/docs/reference/resources/index.md deleted file mode 100644 index 82269149..00000000 --- a/docs/reference/resources/index.md +++ /dev/null @@ -1,796 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to provide a unified API for modeling different data resources within Harper. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: - -- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initially access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. -- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. - -The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. - -The REST-based API is a little different from traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind. Semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: - -- Read - `get` -- Create with a known primary key - `put` -- Create with a generated primary key - `post`/`create` -- Update (Full) - `put` -- Update (Partial) - `patch` -- Delete - `delete` - -The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped an instance of MyResource, and will call the instance methods like `get(target)`, `put(target, data)`, and `post(target, data)`, where target is based on the `/3492` part of the path. - -It is recommended that you use the latest version (V2) of the Resource API with the legacy instance binding behavior disabled. This is done by setting the static `loadAsInstance` property to `false` on the Resource class. This will become the default behavior in Harper version 5.0. This page is written assuming `loadAsInstance` is set to `false`. If you want to use the legacy instance binding behavior, you can set `loadAsInstance` to `true` on the Resource class. If you have existing code that you want to migrate, please see the [migration guide](resources/migration) for more information. - -You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the Harper JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - static loadAsInstance = false; // enable the updated API - async get(target) { - // fetch data from an external source, using our id - let response = await this.fetch(target.id); - // do something with the response - } - put(target, data) { - // send the data into the external source - } - delete(target) { - // delete an entity in the external data source - } - subscribe(subscription) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; // enable the updated API - get(target) { - // we can add properties or change properties before returning data: - return { ...super.get(target), newProperty: 'newValue', existingProperty: 42 }; // returns the record, with additional properties - } - put(target, data) { - // can change data any way we want - super.put(target, data); - } - delete(target) { - super.delete(target); - } - post(target, data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -#### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -#### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](./components/extensions). - -### `transaction` - -This provides a function for starting transactions. See the [transactions documentation](./transactions) for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(target: RequestTarget | Id): Promise|AsyncIterable` - -This retrieves a record, or queries for records, and is called by HTTP GET requests. This can be called with a `RequestTarget` which can specify a path/id and query parameters as well as search parameters. For tables, this can also be called directly with an id (string or number) to retrieve a record by id. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. HTTP requests will always call `get` with a full `RequestTarget`. The default `get` method (`super.get(target)`) returns the current record as a plain object. - -The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -class extends Resource { - static loadAsInstance = false; - get(target) { - let param1 = target.get('param1'); // returns 'value' - let id = target.id; // returns 'some-id' - let path = target.pathname; // returns /some-id - let fullTarget = target.target; // returns /some-id?param1=value - ... - } -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return the record identified by the path. If `get` is called on a collection (`/Table/?name=value`), the target will have the `isCollection` property set to `true` and default action is to `search` and return an AsyncIterable of results. - -### `search(query: RequestTarget)`: AsyncIterable - -This performs a query on this resource or table. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an `AsyncIterable` of results. The `query` object can be used to specify the desired query. - -### `put(target: RequestTarget | Id, data: object): void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(target, data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `patch(target: RequestTarget | Id, data: object): void|Response` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(target, data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `update(target: RequestTarget, updates?: object): Updatable` - -This can be called to get an Updatable class for updating a record. An `Updatable` instance provides direct access to record properties as properties on `Updatable` instance. The properties can also be modified and any changes are tracked and written to the record when the transaction commits. For example, if we wanted to update the quantify of a product in the Product table, in response to a post, we could write: - -```javascript -class ... { - post(target, data) { - static loadAsInstance = false; - let updatable = this.update(target); - updatable.quantity = updatable.quantity - 1; - } -} -``` - -In addition, the `Updatable` class has the following methods. - -### `Updatable` class - -#### `addTo(property, value)` - -This adds the provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. We could improve the example above to reliably ensure the quantity is decremented even when it occurs in multiple nodes simultaneously: - -```javascript -class ... { - static loadAsInstance = false; - post(target, data) { - let updatable = this.update(target); - updatable.addTo('quantity', -1); - } -} -``` - -#### `subtractFrom(property, value)` - -This functions exactly the same as `addTo`, except it subtracts the value. - -The `Updatable` also inherits the `getUpdatedTime` and `getExpiresAt` methods from the `RecordObject` class. - -### `delete(target: RequestTarget): void|Response` - -This will delete this record or resource identified by the target, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(target)`) deletes the record identified by target from the table as part of the current transaction. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `publish(target: RequestTarget, message): void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(target, message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `post(target: RequestTarget, data: object): void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `invalidate(target: RequestTarget)` - -This method is available on tables. This will invalidate the specified record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(target: RequestTarget, incomingMessages?: AsyncIterable): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(target: RequestTarget|Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(target: RequestTarget|Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(target: RequestTarget|Id, data: object, context?: Resource|Context): Promise|any` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(target: RequestTarget|Id, recordUpdate: object, context?: Resource|Context): Promise|void` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(target: RequestTarget|Id, context?: Resource|Context): Promise|void` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(target: RequestTarget|Id, message: object, context?: Resource|Context): Promise|void` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: RequestTarget, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](./transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attribute`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### `RequestTarget` - -The `RequestTarget` class is used to represent a URL path that can be mapped to a resource. This is used by the REST interface to map a URL path to a resource class. All REST methods are called with a `RequestTarget` as the first argument, which is used to determine which record or entry to access or modify. Methods on a `Resource` class can be called with a primary key as a string or number value as the first argument, to access or modify a record by primary key, which will work with all the default methods. The static methods will be transform the primary key to a `RequestTarget` instance to call the instance methods for argument normalization. -When RequestTarget is constructed with a URL path (from the REST methods). The static methods will also automatically parse the path to a `RequestTarget` instance, including parsing the search string into query parameters. -Below are the properties and methods of the `RequestTarget` class: - -- `pathname` - The path of the URL relative to the resource path that matched this request. This excluded the query/search string -- `toString()` - The full relative path and search string of the URL -- `search` - The search/query part the target path (the part after the first `?` character) -- `id` - The primary key of the resource, as determined by the path -- `checkPermission` - This property is set to an object indicating that a permission check should be performed on the - resource. This is used by the REST interface to determine if a user has permission to access the resource. The object - contains: - - `action` - The type of action being performed (read/write/delete) - - `resource` - The resource being accessed - - `user` - The user requesting access - -`RequestTarget` is subclass of `URLSearchParams`, and these methods are available for accessing and modifying the query parameters: - -- `get(name: string)` - Get the value of the query parameter with the specified name -- `getAll(name: string)` - Get all the values of the query parameter with the specified name -- `set(name: string, value: string)` - Set the value of the query parameter with the specified name -- `append(name: string, value: string)` - Append the value to the query parameter with the specified name -- `delete(name: string)` - Delete the query parameter with the specified name -- `has(name: string)` - Check if the query parameter with the specified name exists - -In addition, the `RequestTarget` class is an iterable, so you can iterate through the query parameters: - -- `for (let [name, value] of target)` - Iterate through the query parameters - -When a `RequestTarget` has query parameters using Harper's extended query syntax, the REST static methods will parse the `RequestTarget` and potentially add any of the following properties if they are present in the query: - -- `conditions` - An array of conditions that will be used to filter the query results -- `limit` - The limit of the number of records to return -- `offset` - The number of records to skip before returning the results -- `sort` - The sort order of the query results -- `select` - The properties to return in the query results - -### `RecordObject` - -The `get` method will return a `RecordObject` instance, which is an object containing all the properties of the record. Any property on the record can be directly accessed and the properties can be enumerated with standard JS capabilities like `for`-`in` and `Object.keys`. The `RecordObject` instance will also have the following methods: - -- `getUpdatedTime()` - Get the last updated time (the version number) of the record -- `getExpiresAt()` - Get the expiration time of the entry, if there is one. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, you can interact through standard CRUD/REST methods to create, read, update, and delete records. You can idiomatic property access and modification to interact with the records themselves. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our own `get()` we can interact with the record: - -```javascript -export class CustomProduct extends Product { - async get(target) { - let record = await super.get(target); - let name = record.name; // this is the name of the current product - let rating = record.rating; // this is the rating of the current product - // we can't directly modify the record (it is frozen), but we can copy if we want to return a modification - record = { ...record, rating: 3 }; - return record; - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -// if we want to update a single property: -await Product.patch(1, { rating: 3 }); -``` - -When running inside a transaction, we can use the `update` method and updates are automatically saved when a request completes: - -```javascript -export class CustomProduct extends Product { - post(target, data) { - let record = this.update(target); - record.name = data.name; - record.description = data.description; - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let record = this.update(target); - let brandName = record.brand.name; - let firstVariationPrice = record.variations[0].price; - let additionalInfoOnBrand = record.brand.additionalInfo; // not defined in schema, but can still try to access property - // make some changes - record.variations.splice(0, 1); // remove first variation - record.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - record.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.update(1); -product1.delete('additionalInformation'); -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/docs/reference/resources/instance-binding.md b/docs/reference/resources/instance-binding.md deleted file mode 100644 index 5c507e32..00000000 --- a/docs/reference/resources/instance-binding.md +++ /dev/null @@ -1,721 +0,0 @@ ---- -title: Resource Class with Resource Instance Binding behavior ---- - -# Resource Class with Resource Instance Binding behavior - -This document describes the legacy instance binding behavior of the Resource class. It is recommended that you use the [updated behavior of the Resource API](./) instead, but this legacy API is preserved for backwards compatibility. - -## Resource Class - -```javascript -export class MyExternalData extends Resource { - static loadAsInstance = true; - async get() { - // fetch data from an external source, using our id - let response = await this.fetch(this.id); - // do something with the response - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../components/extensions). - -### `transaction` - -This provides a function for starting transactions. See the [transactions documentation](../transactions) for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. - -### `search(query: Query)`: AsyncIterable - -This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object, query?: Query): Resource|void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. - -The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -put(data, query) { - let param1 = query?.get?.('param1'); // returns 'value' - ... -} -``` - -### `patch(data: object): Resource|void|Response` - -### `patch(data: object, query?: Query)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?): Resource|void|Response` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message): Resource|void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data: object, query?: Query): Resource|void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowRead(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `allowUpdate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowDelete(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `addTo(property, value)` - -This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(id: Id, data: object, context?: Resource|Context): Promise` - -### `post(data: object, context?: Resource|Context): Promise` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](../transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../../developers/applications/defining-schemas) (in addition to the [schema documentation](../../developers/applications/defining-schemas), see the [REST documentation](../../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attributes`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/docs/reference/resources/migration.md b/docs/reference/resources/migration.md deleted file mode 100644 index 51ec4c83..00000000 --- a/docs/reference/resources/migration.md +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: Migration to Resource API version 2 (non-instance binding) ---- - -# Migration to Resource API version 2 (non-instance binding) - -The Resource API was inspired by two major design ideas: the REST architectural design and the [Active Record pattern](https://en.wikipedia.org/wiki/Active_record_pattern) (made popular by Ruby on Rails and heavily used as a pattern in many ORMs). The basic design goal of the Resource API is to integrate these concepts into a single construct that can directly map RESTful methods (specifically the "uniform interface" of HTTP) to an active record data model. However, while the active record pattern has been for _consumption_ of data, implementing methods for endpoint definitions and caching sources as a data _provider_ can be confusing and cumbersome to implement. The updated non-instance binding Resource API is designed to make it easier and more consistent to implement a data provider and interact with records across a table, while maintaining more explicit control over what data is loaded and when. - -The updated Resource API is enabled on a per-class basis by setting static `loadAsInstance` property to `false`. When this property is set to `false`, this means that the Resource instances will not be bound to a specific record. Instead instances represent the whole table, capturing the context and current transactional state. Any records in the table can be loaded or modified from `this` instance. There are a number of implications and different behaviors from a Resource class with `static loadAsInstance = false`: - -- The `get` method (both static and instance) will directly return the record, a frozen enumerable object with direct properties, instead of a Resource instance. -- When instance methods are called, there will not be any record preloaded beforehand and the resource instance will not have properties mapped to a record. -- All instance methods accept a `target`, an instance of `RequestTarget`, as the first argument, which identifies the target record or query. - - The `target` will have an `id` property identifying the target resource, along with target information. - - The `getId()` method is no longer used and will return `undefined`. - - The `target` will provide access to query parameters, search operators, and other directives. - - A `target` property of `checkPermission` indicates that a method should check the permission before of request before proceeding. The default instance methods provide the default authorization behavior. - - This supplants the need for `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete` methods, which shouldn't need to be used (and don't provide the id of the target record). -- Any data from a POST, PUT, and PATCH request will be available in the second argument. This reverses the order of the arguments to `put`, `post`, and `patch` compared to the legacy Resource API. -- Context is tracked using asynchronous context tracking, and will automatically be available to calls to other resources. This can be disabled by setting `static explicitContext = true`, which can improve performance. -- The `update` method will return an `Updatable` object (instead of a Resource instance), which provides properties mapped to a record, but these properties can be updated and changes will be saved when the transaction is committed. - -The following are examples of how to migrate to the non-instance binding Resource API. - -Previous code with a `get` method: - -```javascript -export class MyData extends tables.MyData { - async get(query) { - let id = this.getId(); // get the id - if (query?.size > 0) { - // check number of query parameters - let idWithQuery = id + query.toString(); // add query parameters - let resource = await tables.MyData.get(idWithQuery, this); // retrieve another record - resource.newProperty = 'value'; // assign a new value to the returned resource instance - return resource; - } else { - this.newProperty = 'value'; // assign a new value to this instance - return super.get(query); - } - } -} -``` - -Updated code: - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - async get(target) { - let id = target.id; // get the id - let record; - if (target.size > 0) { - // check number of query parameters - let idWithQuery = target.toString(); // this is the full target with the path query parameters - // we can retrieve another record from this table directly with this.get/super.get or with tables.MyData.get - record = await super.get(idWithQuery); - } else { - record = await super.get(target); // we can just directly use the target as well - } - // the record itself is frozen, but we can copy/assign to a new object with additional properties if we want - return { ...record, newProperty: 'value' }; - } -} -``` - -Here is an example of the preferred approach for authorization: -Previous code with a `get` method: - -```javascript -export class MyData extends tables.MyData { - allowRead(user) { - // allow any authenticated user - return user ? true : false; - } - async get(query) { - // any get logic - return super.get(query); - } -} -``` - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - async get(target) { - // While you can still use allowRead, it is not called before get is called, and it is generally encouraged - // to perform/call authorization explicitly in direct get, put, post methods rather than using allow* methods. - if (!this.getContext().user) throw new Error('Unauthorized'); - target.checkPermissions = false; // authorization complete, no need to further check permissions below - // target.checkPermissions is set to true or left in place, this default get method will perform the default permissions checks - return super.get(target); // we can just directly use the query as well - } -} -``` - -Here is an example of how to convert/upgrade an implementation of a `post` method: -Previous code with a `post` method: - -```javascript -export class MyData extends tables.MyData { - async post(data, query) { - let resource = await tables.MyData.get(data.id, this); - if (resource) { - // update a property - resource.someProperty = 'value'; - // or - tables.MyData.patch(data.id, { someProperty: 'value' }, this); - } else { - // create a new record - MyData.create(data, this); - } - } -} -``` - -Updated code: - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - // IMPORTANT: arguments are reversed: - async post(target, data) { - let record = await this.get(data.id); - if (record) { - // update a property - const updatable = await this.update(data.id); // we can alternately pass a target to update - updatable.someProperty = 'value'; - // or - this.patch(data.id, { someProperty: 'value' }); - } else { - // create a new record - this.create(data); - } - } -} -``` diff --git a/docs/reference/resources/query-optimization.md b/docs/reference/resources/query-optimization.md deleted file mode 100644 index 139b862b..00000000 --- a/docs/reference/resources/query-optimization.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Query Optimization ---- - -## Query Optimization - -Harper has powerful query functionality with excellent performance characteristics. However, like any database, different queries can vary significantly in performance. It is important to understand how querying works to help you optimize your queries for the best performance. - -### Query Execution - -At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database and delivering the results based on required fields, relationships, and ordering. Harper supports indexed fields, and these indexes are used to speed up query execution. When conditions are specified in a query, Harper will attempt to utilize indexes to optimize the speed of query execution. When a field is not indexed, a query specifies a condition on that field, and the database check each potential record to determine if it matches the condition. - -When a query is performed with multiple conditions, Harper will attempt to optimize the ordering of these conditions. When using intersecting conditions (the default, an `and` operator, matching records must all match all conditions), Harper will attempt to to apply the most selective and performant condition first. This means that if one condition can use an index and is more selective than another, it will be used first to find the initial matching set of data and then filter based on the remaining conditions. If a condition can search an indexed field, with a selective condition, it will be used before conditions that aren't indexed, or as selective. The `search` method includes an `explain` flag that can be used to return a query execution order to understand how the query is being executed. This can be useful for debugging and optimizing queries. - -For a union query, each condition is executed separately and the results are combined/merged. - -### Condition, Operators, and Indexing - -When a query is performed, the conditions specified in the query are evaluated against the data in the database. The conditions can be simple or complex, and can include scalar operators such as `=`, `!=`, `>`, `<`, `>=`, `<=`, as well as `starts_with`, `contains`, and `ends_with`. The use of these operators can affect the performance of the query, especially when used with indexed fields. If an indexed field is not used, the database will have to check each potential record to determine if it matches the condition. If the only condition is not indexed, or there are no conditions with an indexed field, the database will have to check every record with a full table scan and can be very slow for large datasets (it will get slower as the dataset grows, `O(n)`). - -The use of indexed fields can significantly improve the performance of a query, providing fast performance even as the database grows in size (`O(log n)`). However, indexed fields require extra writes to the database when performing insert, update, or delete operations. This is because the index must be updated to reflect the changes in the data. This can slow down write operations, but the trade-off is often worth it if the field is frequently used in queries. - -The different operators can also affect the performance of a query. For example, using the `=` operator on an indexed field is generally faster than using the `!=` operator, as the latter requires checking all records that do not match the condition. An index is a sorted listed of values, so the greater than and less than operators will also utilize indexed fields when possible. If the range is narrow, these operations can be very fast. A wide range could yield a large number of records and will naturally incur more overhead. The `starts_with` operator can also leverage indexed fields because it quickly find the correct matching entries in the sorted index. On other hand, the `contains` and `ends_with` and not equal (`!=` or `not_equal`) operators can not leverage the indexes, so they will require a full table scan to find the matching records if they are not used in conjunction in with a selective/indexed condition. There is a special case of `!= null` which can use indexes to find non-null records. However, there is generally only helpful for sparse fields where a small subset are non-null values. More generally, operators are more efficient if they are selecting on fields with a high cardinality. - -Conditions can be applied to primary key fields or other indexed fields (known as secondary indexes). In general, querying on a primary key will be faster than querying on a secondary index, as the primary key is the most efficient way to access data in the database, and doesn't require cross-referencing to the main records. - -### Relationships/Joins - -Harper supports relationships between tables, allowing for "join" queries that. This does result in more complex queries with potentially larger performance overhead, as more lookups are necessary to connect matched or selected data with other tables. Similar principles apply to conditions which use relationships. Indexed fields and comparators that leverage the ordering are still valuable for performance. It is also important that if a condition on a table is connected to another table's foreign key, that that foreign key also be indexed. Likewise, if a query `select`s data from a related table that uses a foreign key to relate, that it is indexed. The same principles of higher cardinality applies here as well, more unique values allow for efficient lookups. - -### Sorting - -Queries can also specify a sort order. This can also significantly impact performance. If a query specifies a sort order on an indexed field, the database can use the index to quickly retrieve the data in the specified order. A sort order can be used in conjunction with a condition on the same (indexed) field can utilize the index for ordering. However, if the sort order is not on an indexed field, or the query specifies conditions on different fields, Harper will generally need to sort the data after retrieving it, which can be slow for large datasets. The same principles apply to sorting as they do to conditions. Sorting on a primary key is generally faster than sorting on a secondary index, if the condition aligns with the sort order. - -### Streaming - -One of the unique and powerful features of Harper's querying functionality is the ability to stream query results. When possible, Harper can return records from a query as they are found, rather than waiting for the entire query to complete. This can significantly improve performance for large queries, as it allows the application to start processing results or sending the initial data before the entire query is complete (improving time-to-first-byte speed, for example). However, using a sort order on a query with conditions that are not on an aligned index requires that the entire query result be loaded in order to perform the sorting, which defeats the streaming benefits. diff --git a/docs/reference/roles.md b/docs/reference/roles.md deleted file mode 100644 index 2e3dc570..00000000 --- a/docs/reference/roles.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Roles ---- - -# Roles - -Roles in Harper are part of the application’s role-based access control (RBAC) system. You can declare roles in your application and manage their permissions through a roles configuration file. When the application starts, Harper will ensure all declared roles exist with the specified permissions, updating them if necessary. - -## Configuring Roles - -Point to a roles configuration file from your application’s `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -You can declare one or more files. Each file should define one or more roles in YAML format. - -## Roles File Structure - -A roles file (`roles.yaml`) contains role definitions keyed by role name. Each role may contain: - -- **super_user** – a boolean that grants all permissions. -- **databases** – one or more databases the role has access to. -- **tables** – within each database, table-level and attribute-level permissions. - -**Full Example** - -```yaml -: - super_user: # optional - : - : - read: - insert: - update: - delete: - attributes: - : - read: - insert: - update: -``` - -## Role Flags - -- `super_user: true` — grants full system access. -- `super_user: false` — the role only has the explicit permissions defined in the role. - -## Database and Table Permissions - -Within each role, you may specify one or more databases. Each database can declare permissions for tables. - -Example: - -```yaml -analyst: - super_user: false - data: - Sales: - read: true - insert: false - update: false - delete: false -``` - -In this example, the `analyst` role has read-only access to the `Sales` table in the `data` database. - -## Attribute-Level Permissions - -You can also grant or deny access at the attribute level within a table. - -Example: - -```yaml -editor: - data: - Articles: - read: true - insert: true - update: true - attributes: - title: - read: true - update: true - author: - read: true - update: false -``` - -Here, the `editor` role can update the `title` of an article but cannot update the `author`. - -## Multiple Roles - -Roles can be defined side by side in a single file: - -```yaml -reader: - super_user: false - data: - Dog: - read: true - -writer: - super_user: false - data: - Dog: - insert: true - update: true -``` - -## Behavior on Startup - -- If a declared role does not exist, Harper creates it. -- If a declared role already exists, Harper updates its permissions to match the definition. -- Roles are enforced consistently across deployments, keeping access control in sync with your application code. diff --git a/docs/reference/sql-guide/date-functions.md b/docs/reference/sql-guide/date-functions.md deleted file mode 100644 index c9747dcd..00000000 --- a/docs/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: SQL Date Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Date Functions - -Harper utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/docs/reference/sql-guide/features-matrix.md b/docs/reference/sql-guide/features-matrix.md deleted file mode 100644 index 7766faa4..00000000 --- a/docs/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## SQL Features Matrix - -Harper provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/docs/reference/sql-guide/functions.md b/docs/reference/sql-guide/functions.md deleted file mode 100644 index 789090a4..00000000 --- a/docs/reference/sql-guide/functions.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: Harper SQL Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Functions - -This SQL keywords reference contains the SQL functions available in Harper. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/docs/reference/sql-guide/index.md b/docs/reference/sql-guide/index.md deleted file mode 100644 index 52f245ab..00000000 --- a/docs/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## Harper SQL Guide - -The purpose of this guide is to describe the available functionality of Harper as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -Harper adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -Harper has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -Harper supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. Harper does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -Harper supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -Harper supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -Harper allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/docs/reference/sql-guide/json-search.md b/docs/reference/sql-guide/json-search.md deleted file mode 100644 index 3e6b6326..00000000 --- a/docs/reference/sql-guide/json-search.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: SQL JSON Search ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL JSON Search - -Harper automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, Harper offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/docs/reference/sql-guide/reserved-word.md b/docs/reference/sql-guide/reserved-word.md deleted file mode 100644 index 2cd812ba..00000000 --- a/docs/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: Harper SQL Reserved Words ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: - -``` -SELECT * from data.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from data.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/docs/reference/sql-guide/sql-geospatial-functions.md b/docs/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index f0c571da..00000000 --- a/docs/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,419 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Geospatial Functions - -Harper geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: https://geojson.org/. There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -1. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -1. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between Harper’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain Harper Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see https://developers.arcgis.com/documentation/spatial-references/. Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find Harper Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "Harper Headquarters" - }' -) -``` diff --git a/docs/reference/storage-algorithm.md b/docs/reference/storage-algorithm.md deleted file mode 100644 index 03c4c014..00000000 --- a/docs/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The Harper storage algorithm is fundamental to the Harper core functionality, enabling the [Dynamic Schema](dynamic-schema) and all other user-facing functionality. Harper is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within Harper. - -## Query Language Agnostic - -The Harper storage algorithm was designed to abstract the data storage from any individual query language. Harper currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, Harper offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each Harper table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. Harper tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [Harper Dynamic Schema](dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -Harper inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## Harper Indexing Example (Single Table) - -![](/img/v4.6/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/docs/reference/transactions.md b/docs/reference/transactions.md deleted file mode 100644 index 7e8546fb..00000000 --- a/docs/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. Harper provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in Harper. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because Harper is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the Harper environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_docs/version-4.1/add-ons-and-sdks/google-data-studio.md b/versioned_docs/version-4.1/add-ons-and-sdks/google-data-studio.md deleted file mode 100644 index 48ebaca1..00000000 --- a/versioned_docs/version-4.1/add-ons-and-sdks/google-data-studio.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Google Data Studio ---- - -# Google Data Studio - -[Google Data Studio](https://datastudio.google.com/) is a free collaborative visualization tool which enables users to build configurable charts and tables quickly. The HarperDB Google Data Studio connector seamlessly integrates your HarperDB data with Google Data Studio so you can build custom, real-time data visualizations. - -The HarperDB Google Data Studio Connector is subject to our [Terms of Use](https://harperdb.io/legal/harperdb-cloud-terms-of-service/) and [Privacy Policy](https://harperdb.io/legal/privacy-policy/). - -## Requirements - -The HarperDB database must be accessible through the Internet in order for Google Data Studio servers to access it. The database may be hosted by you or via HarperDB Cloud. - -## Get Started - -Get started by selecting the HarperDB connector from the [Google Data Studio Partner Connector Gallery](https://datastudio.google.com/u/0/datasources/create). - -1. Log in to [https://datastudio.google.com/](https://datastudio.google.com/). -1. Add a new Data Source using the HarperDB connector. The current release version can be added as a data source by following this link: [HarperDB Google Data Studio Connector](https://datastudio.google.com/datasources/create?connectorId=AKfycbxBKgF8FI5R42WVxO-QCOq7dmUys0HJrUJMkBQRoGnCasY60_VJeO3BhHJPvdd20-S76g). -1. Authorize the connector to access other servers on your behalf (this allows the connector to contact your database). -1. Enter the Web URL to access your database (preferably with HTTPS), as well as the Basic Auth key you use to access the database. Just include the key, not the word "Basic" at the start of it. -1. Check the box for "Secure Connections Only" if you want to always use HTTPS connections for this data source; entering a Web URL that starts with https:// will do the same thing, if you prefer. -1. Check the box for "Allow Bad Certs" if your HarperDB instance does not have a valid SSL certificate. HarperDB Cloud always has valid certificates, and so will never require this to be checked. Instances you set up yourself may require this, if you are using self-signed certs. If you are using HarperDB Cloud or another instance you know should always have valid SSL certificates, do not check this box. -1. Choose your Query Type. This determines what information the configuration will ask for after pressing the Next button. - - Table will ask you for a Schema and a Table to return all fields of using `SELECT *`. - - SQL will ask you for the SQL query you’re using to retrieve fields from the database. You may `JOIN` multiple tables together, and use HarperDB specific SQL functions, along with the usual power SQL grants. -1. When all information is entered correctly, press the Connect button in the top right of the new Data Source view to generate the Schema. You may also want to name the data source at this point. If the connector encounters any errors, a dialog box will tell you what went wrong so you can correct the issue. -1. If there are no errors, you now have a data source you can use in your reports! You may change the types of the generated fields in the Schema view if you need to (for instance, changing a Number field to a specific currency), as well as creating new fields from the report view that do calculations on other fields. - -## Considerations - -- Both Postman and the [HarperDB Studio](../harperdb-studio/) app have ways to convert a user:password pair to a Basic Auth token. Use either to create the token for the connector’s user. - - You may sign out of your current user by going to the instances tab in HarperDB Studio, then clicking on the lock icon at the top-right of a given instance’s box. Click the lock again to sign in as any user. The Basic Auth token will be visible in the Authorization header portion of any code created in the Sample Code tab. -- It’s highly recommended that you create a read-only user role in HarperDB Studio, and create a user with that role for your data sources to use. This prevents that authorization token from being used to alter your database, should someone else ever get ahold of it. -- The RecordCount field is intended for use as a metric, for counting how many instances of a given set of values appear in a report’s data set. -- _Do not attempt to create fields with spaces in their names_ for any data sources! Google Data Studio will crash when attempting to retrieve a field with such a name, producing a System Error instead of a useful chart on your reports. Using CamelCase or snake_case gets around this. diff --git a/versioned_docs/version-4.1/add-ons-and-sdks/index.md b/versioned_docs/version-4.1/add-ons-and-sdks/index.md deleted file mode 100644 index cb6b6a21..00000000 --- a/versioned_docs/version-4.1/add-ons-and-sdks/index.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: Add-ons & SDKs ---- - -# Add-ons & SDKs - -All HarperDB Add-Ons and SDKs can be found in the [HarperDB Marketplace](../4.1/harperdb-studio/resources) located in the [HarperDB Studio](../4.1/harperdb-studio/resources). diff --git a/versioned_docs/version-4.1/audit-logging.md b/versioned_docs/version-4.1/audit-logging.md deleted file mode 100644 index 010215e2..00000000 --- a/versioned_docs/version-4.1/audit-logging.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Audit Logging ---- - -## Audit log - -The audit log uses a standard HarperDB table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is disabled by default. To use the audit log, set `logging.auditLog` to true in the config file, `harperdb-config.yaml`. Then restart HarperDB for those changes to take place. - -## Audit Log Operations - -### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [HarperDB API documentation](https://api.harperdb.io/). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that this the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.1/clustering/certificate-management.md b/versioned_docs/version-4.1/clustering/certificate-management.md deleted file mode 100644 index b69c4786..00000000 --- a/versioned_docs/version-4.1/clustering/certificate-management.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box HarperDB generates certificates that are used when HarperDB nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the HarperDB node, the following settings (see the full [configuration file](../configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that HarperDB generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your HarperDB cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make HarperDB start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart HarperDB. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other HarperDB nodes and to make requests to other HarperDB nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart HarperDB. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.1/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.1/clustering/creating-a-cluster-user.md deleted file mode 100644 index 7865ae1c..00000000 --- a/versioned_docs/version-4.1/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via HarperDB users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, HarperDB must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.1/clustering/enabling-clustering.md b/versioned_docs/version-4.1/clustering/enabling-clustering.md deleted file mode 100644 index 596665d9..00000000 --- a/versioned_docs/version-4.1/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file HarperDB must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to HarperDB configuration HarperDB must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install HarperDB**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.1/clustering/establishing-routes.md b/versioned_docs/version-4.1/clustering/establishing-routes.md deleted file mode 100644 index 868a1fed..00000000 --- a/versioned_docs/version-4.1/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the HarperDB configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.1/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to HarperDB configuration HarperDB must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.1/clustering/index.md b/versioned_docs/version-4.1/clustering/index.md deleted file mode 100644 index 6cbb2641..00000000 --- a/versioned_docs/version-4.1/clustering/index.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -HarperDB clustering is the process of connecting multiple HarperDB databases together to create a database mesh network that enables users to define data replication patterns. - -HarperDB’s clustering engine replicates data between instances of HarperDB using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manor. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. - -- There may not be a reliable network connection. - -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. - -- The edge node should be inaccessible from outside the firewall. - -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -HarperDB simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. - -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. - -- When a threshold violation occurs, the application adds a record to the "alerts" table. - -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. - -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting HarperDB focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to HarperDB, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.1/clustering/managing-subscriptions.md b/versioned_docs/version-4.1/clustering/managing-subscriptions.md deleted file mode 100644 index bee50508..00000000 --- a/versioned_docs/version-4.1/clustering/managing-subscriptions.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: Managing subscriptions ---- - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The schema and tables in the subscription must exist on either the local or the remote node. Any schema and tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `add_node`. - -```json -{ - "operation": "add_node", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "schema": "dev", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node use `update_node`. - -```json -{ - "operation": "update_node", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "schema": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "schema": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "add_node", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.1/clustering/naming-a-node.md b/versioned_docs/version-4.1/clustering/naming-a-node.md deleted file mode 100644 index 308aef7a..00000000 --- a/versioned_docs/version-4.1/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file HarperDB must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.1/clustering/requirements-and-definitions.md b/versioned_docs/version-4.1/clustering/requirements-and-definitions.md deleted file mode 100644 index 1e2dd6af..00000000 --- a/versioned_docs/version-4.1/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of HarperDB running. - -\*_A node is a single instance/installation of HarperDB. A node of HarperDB can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a HarperDB cluster. diff --git a/versioned_docs/version-4.1/clustering/subscription-overview.md b/versioned_docs/version-4.1/clustering/subscription-overview.md deleted file mode 100644 index 1e9ea5d1..00000000 --- a/versioned_docs/version-4.1/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscriptions ---- - -# Subscriptions - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching schema name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`schema` - the name of the schema that the table you are creating the subscription for belongs to. - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.1/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.1/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.1/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.1/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.1/clustering/things-worth-knowing.md b/versioned_docs/version-4.1/clustering/things-worth-knowing.md deleted file mode 100644 index e4f5bf18..00000000 --- a/versioned_docs/version-4.1/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things worth Knowing ---- - -# Things worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any schemas and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive schema operations do not replicate across a cluster**. Those operations include `drop_schema`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop schema information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -HarperDB has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -HarperDB clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![figure 6](/img/v4.1/clustering/figure6.png) diff --git a/versioned_docs/version-4.1/configuration.md b/versioned_docs/version-4.1/configuration.md deleted file mode 100644 index 3e40e32b..00000000 --- a/versioned_docs/version-4.1/configuration.md +++ /dev/null @@ -1,790 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -HarperDB is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the operations API root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -All available configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase: `operationsApi`. - -To change a configuration value edit the `harperdb-config.yaml` file and save any changes. HarperDB must be restarted for changes to take effect. - -Alternately, configuration can be changed via environment and/or command line variables or via the API. To access lower level elements, use underscores to append parent/child elements (when used this way elements are case insensitive): - - - Environment variables: `OPERATIONSAPI_NETWORK_PORT=9925` - - Command line variables: `--OPERATIONSAPI_NETWORK_PORT 9925` - - Calling `set_configuration` through the API: `operationsApi_network_port: 9925` - ---- - -## Configuration Options - -### `clustering` - -The `clustering` section configures the clustering engine, this is used to replicate data between instances of HarperDB. - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the HarperDB mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -
- -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -
- -
- -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -
- -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -
- -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -
- -
- -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: <ROOTPATH>/clustering/leaf - -The directory where all the streams are kept. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your HarperDB cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: <ROOTPATH>/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: <ROOTPATH>/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: <ROOTPATH>/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: true - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special HarperDB user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `customFunctions` - -The `customFunctions` section configures HarperDB Custom Functions. - -`enabled` - _Type_: boolean; _Default_: true - -Enable the Custom Function server or not. - -```yaml -customFunctions: - enabled: true -``` - -`customFunctions.network` - -```yaml -customFunctions: - network: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - https: false - keepAliveTimeout: 5000 - port: 9926 - timeout: 120000 -``` - -
- -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`https` - _Type_: boolean; _Default_: false - -Enables HTTPS on the Custom Functions API. This requires a valid certificate and key. If `false`, Custom Functions will run using standard HTTP. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the Custom Functions server. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -
- -`nodeEnv` - _Type_: string; _Default_: production - -Allows you to specify the node environment in which application will run. - -```yaml -customFunctions: - nodeEnv: production -``` - -- `production` native node logging is kept to a minimum; more caching to optimize performance. This is the default value. -- `development` more native node logging; less caching. - -`root` - _Type_: string; _Default_: <ROOTPATH>/custom_functions - -The path to the folder containing Custom Function files. - -```yaml -customFunctions: - root: ~/hdb/custom_functions -``` - -`tls` -Transport Layer Security - -```yaml -customFunctions: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: <ROOTPATH>/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: <ROOTPATH>/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: <ROOTPATH>/keys/privateKey.pem - -Path to the private key file. - ---- - -### `ipc` - -The `ipc` section configures the HarperDB Inter-Process Communication interface. - -```yaml -ipc: - network: - port: 9383 -``` - -`port` - _Type_: integer; _Default_: 9383 - -The port the IPC server runs on. The default is `9383`. - ---- - -### `localStudio` - -The `localStudio` section configures the local HarperDB Studio, a simplified GUI for HarperDB hosted on the server. A more comprehensive GUI is hosted by HarperDB at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or HarperDB Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures HarperDB logging across all HarperDB functionality. HarperDB leverages pm2 for logging. Each process group gets their own log file which is located in `logging.root`. - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether or not to log to a file. - -```yaml -logging: - file: true -``` - -`level` - _Type_: string; _Default_: error - -Control the verbosity of logs. - -```yaml -logging: - level: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`root` - _Type_: string; _Default_: <ROOTPATH>/log - -The path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -**_Note:_** `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -
- -`enabled` - _Type_: boolean; _Default_: false - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: <ROOTPATH>/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -
- -`stdStreams` - _Type_: boolean; _Default_: false - -Log HarperDB logs to the standard output and error streams. The `operationsApi.foreground` flag must be enabled in order to receive the stream. - -```yaml -logging: - stdStreams: false -``` - ---- - -### `operationsApi` - -The `operationsApi` section configures the HarperDB Operations API. - -`authentication` - -```yaml -operationsApi: - authentication: - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -
- -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -
- -`foreground` - _Type_: boolean; _Default_: false - -Determines whether or not HarperDB runs in the foreground. - -```yaml -operationsApi: - foreground: false -``` - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - https: false - keepAliveTimeout: 5000 - port: 9925 - timeout: 120000 -``` - -
- -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`https` - _Type_: boolean; _Default_: false - -Enable HTTPS on the HarperDB operations endpoint. This requires a valid certificate and key. If `false`, HarperDB will run using standard HTTP. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the HarperDB operations API interface will listen on. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -
- -`nodeEnv` - _Type_: string; _Default_: production - -Allows you to specify the node environment in which application will run. - -```yaml -operationsApi: - nodeEnv: production -``` - -- `production` native node logging is kept to a minimum; more caching to optimize performance. This is the default value. -- `development` more native node logging; less caching. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: <ROOTPATH>/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: <ROOTPATH>/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: <ROOTPATH>/keys/privateKey.pem - -Path to the private key file. - ---- - -### `http` - -`threads` - _Type_: number; _Default_: One less than the number of logical cores/ processors - -The `threads` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because HarperDB does have other threads at work), assuming HarperDB is the main service on a server. - -```yaml -http: - threads: 11 -``` - -#### Session Affinity - -`sessionAffinity` - _Type_: string; _Default_: null - -HarperDB is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using HarperDB to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using HarperDB behind a proxy server or application server, all the remote ip addresses will be the same and HarperDB will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The HarperDB database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the HarperDB application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: false - -The `compression` option enables compression of records in the database. This can be helpful for very large databases in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`noReadAhead` - _Type_: boolean; _Default_: true - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization, except in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/schema` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -**_Note:_** This configuration applies to all database files, which includes system tables that are used internally by HarperDB. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - ---- - -### `schemas` - -The `schemas` section is an optional configuration that can be used to define where database files should reside down to the table level. -

This configuration should be set before the schema and table have been created. -

The configuration will not create the directories in the path, that must be done by the user. -
- -To define where a schema and all its tables should reside use the name of your schema and the `path` parameter. - -```yaml -schemas: - nameOfSchema: - path: /path/to/schema -``` - -To define where specific tables within a schema should reside use the name of your schema, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -schemas: - nameOfSchema: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -schemas: - nameOfSchema: - auditPath: /path/to/schema -``` - -
- -**Setting the schemas section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the schemas section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---SCHEMAS [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -SCHEMAS=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "schemas": [ - { - "nameOfSchema": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` diff --git a/versioned_docs/version-4.1/custom-functions/create-project.md b/versioned_docs/version-4.1/custom-functions/create-project.md deleted file mode 100644 index 8adc5bea..00000000 --- a/versioned_docs/version-4.1/custom-functions/create-project.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Create a Project ---- - -# Create a Project - -To create a project using our web-based GUI, HarperDB Studio, checkout out how to manage Custom Functions [here](../harperdb-studio/manage-functions). - -Otherwise, to create a project, you have the following options: - -1. **Use the add_custom_function_project operation** - - This operation creates a new project folder, and populates it with templates for the routes, helpers, and static subfolders. - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -1. **Clone our public gitHub project template** - - _This requires a local installation. Remove the .git directory for a clean slate of git history._ - -```bash -> git clone https://github.com/HarperDB/harperdb-custom-functions-template.git ~/hdb/custom_functions/dogs -``` - -1. **Create a project folder in your Custom Functions root directory** and **initialize** - - _This requires a local installation._ - -```bash -> mkdir ~/hdb/custom_functions/dogs -``` - -```bash -> npm init -``` diff --git a/versioned_docs/version-4.1/custom-functions/custom-functions-operations.md b/versioned_docs/version-4.1/custom-functions/custom-functions-operations.md deleted file mode 100644 index 11cecde5..00000000 --- a/versioned_docs/version-4.1/custom-functions/custom-functions-operations.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Custom Functions Operations ---- - -# Custom Functions Operations - -One way to manage Custom Functions is through [HarperDB Studio](../harperdb-studio/). It performs all the necessary operations automatically. To get started, navigate to your instance in HarperDB Studio and click the subnav link for "functions". If you have not yet enabled Custom Functions, it will walk you through the process. Once configuration is complete, you can manage and deploy Custom Functions in minutes. - -HarperDB Studio manages your Custom Functions using nine HarperDB operations. You may view these operations within our [API Docs](https://api.harperdb.io/). A brief overview of each of the operations is below: - -- **custom_functions_status** - - Returns the state of the Custom Functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -- **get_custom_functions** - - Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the **routes** and **helpers** directories, and the total file count in the **static** folder. - -- **get_custom_function** - - Returns the content of the specified file as text. HarperDB Studio uses this call to render the file content in its built-in code editor. - -- **set_custom_function** - - Updates the content of the specified file. HarperDB Studio uses this call to save any changes made through its built-in code editor. - -- **drop_custom_function** - - Deletes the specified file. - -- **add_custom_function_project** - - Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -- **drop_custom_function_project** - - Deletes the specified project folder and all of its contents. - -- **package_custom_function_project** - - Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns that string the user. - -- **deploy_custom_function_project** - - Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. diff --git a/versioned_docs/version-4.1/custom-functions/debugging-custom-function.md b/versioned_docs/version-4.1/custom-functions/debugging-custom-function.md deleted file mode 100644 index e57dde0b..00000000 --- a/versioned_docs/version-4.1/custom-functions/debugging-custom-function.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: Debugging a Custom Function ---- - -# Debugging a Custom Function - -HarperDB Custom Functions projects are managed by HarperDB’s process manager. As such, it may seem more difficult to debug Custom Functions than your standard project. The goal of this document is to provide best practices and recommendations for debugging your Custom Function. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use HarperDB's logging facilities, so you aren't logging to the console. The [HarperDB Custom Functions template](https://github.com/HarperDB/harperdb-custom-functions-template) includes the HarperDB logger module in the primary function parameters with the name `logger`. This logger can be used to output messages directly to the HarperDB log using standardized logging level functions, described below. The log level can be set in the [HarperDB Configuration File](../configuration). - -HarperDB Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For debugging purposes, it is recommended to use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The HarperDB Log can be found on the [Studio Status page](../harperdb-studio/instance-metrics) or in the local Custom Functions log file, `/log/custom_functions.log`. Additionally, you can use the [`read_log` operation](https://api.harperdb.io/#7f718dd1-afa5-49ce-bc0c-564e17b1c9cf) to query the HarperDB log. - -### Example 1: Execute Query and Log Results - -This example performs a SQL query in HarperDB and logs the result. This example utilizes the `logger.notify` function to log the stringified version of the result. If an error occurs, it will output the error using `logger.error` and return the error. - -```javascript -server.route({ - url: '/', - method: 'GET', - handler: async (request) => { - request.body = { - operation: 'sql', - sql: 'SELECT * FROM dev.dog ORDER BY dog_name', - }; - - try { - let result = await hdbCore.requestWithoutAuthentication(request); - logger.notify(`Query Result: ${JSON.stringify(result)}`); - return result; - } catch (e) { - logger.error(`Query Error: ${e}`); - return e; - } - }, -}); -``` - -### Example 2: Execute Multiple Queries and Log Activity - -This example performs two SQL queries in HarperDB with logging throughout to describe what is happening. This example utilizes the `logger.notify` function to log the stringified version of the operation and the result of each query. If an error occurs, it will output the error using `logger.error` and return the error. - -```javascript -server.route({ - url: '/example', - method: 'GET', - handler: async (request) => { - logger.notify('/example called!'); - const results = []; - - request.body = { - operation: 'sql', - sql: 'SELECT * FROM dev.dog WHERE id = 1', - }; - logger.notify(`Query 1 Operation: ${JSON.stringify(request.body)}`); - try { - let result = await hdbCore.requestWithoutAuthentication(request); - logger.notify(`Query 1: ${JSON.stringify(result)}`); - results.push(result); - } catch (e) { - logger.error(`Query 1: ${e}`); - return e; - } - - request.body = { - operation: 'sql', - sql: 'SELECT * FROM dev.dog WHERE id = 2', - }; - logger.notify(`Query 2 Operation: ${JSON.stringify(request.body)}`); - try { - let result = await hdbCore.requestWithoutAuthentication(request); - logger.notify(`Query 2: ${JSON.stringify(result)}`); - results.push(result); - } catch (e) { - logger.error(`Query 2: ${e}`); - return e; - } - - logger.notify('/example complete!'); - return results; - }, -}); -``` diff --git a/versioned_docs/version-4.1/custom-functions/define-helpers.md b/versioned_docs/version-4.1/custom-functions/define-helpers.md deleted file mode 100644 index 5f8df3e8..00000000 --- a/versioned_docs/version-4.1/custom-functions/define-helpers.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: Define Helpers ---- - -# Define Helpers - -Helpers are functions for use within your routes. You may want to use the same helper in multiple route files, so this allows you to write it once, and include it wherever you need it. - -- To use your helpers, they must be exported from your helper file. Please use any standard export mechanisms available for your module system. We like ESM, ECMAScript Modules. Our example below exports using `module.exports`. - -- You must import the helper module into the file that needs access to the exported functions. With ESM, you'd use a `require` statement. See [this example](./define-routes#custom-prevalidation-hooks) in Define Routes. - -Below is code from the customValidation helper that is referenced in [Define Routes](./define-routes). It takes the request and the logger method from the route declaration, and makes a call to an external API to validate the headers using fetch. The API in this example is just returning a list of ToDos, but it could easily be replaced with a call to a real authentication service. - -```javascript -const customValidation = async (request, logger) => { - let response = await fetch('https://jsonplaceholder.typicode.com/todos/1', { - headers: { authorization: request.headers.authorization }, - }); - let result = await response.json(); - - /* - * throw an authentication error based on the response body or statusCode - */ - if (result.error) { - const errorString = result.error || 'Sorry, there was an error authenticating your request'; - logger.error(errorString); - throw new Error(errorString); - } - return request; -}; - -module.exports = customValidation; -``` diff --git a/versioned_docs/version-4.1/custom-functions/define-routes.md b/versioned_docs/version-4.1/custom-functions/define-routes.md deleted file mode 100644 index ca64384c..00000000 --- a/versioned_docs/version-4.1/custom-functions/define-routes.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: Define Routes ---- - -# Define Routes - -HarperDB’s Custom Functions is built on top of [Fastify](https://www.fastify.io/), so our route definitions follow their specifications. Below is a very simple example of a route declaration. - -Route URLs are resolved in the following manner: - -- [**Instance URL**]:[**Custom Functions Port**]/[**Project Name**]/[**Route URL**] - -- The route below, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to HarperDB. The same result could have been achieved by hitting the core HarperDB API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -module.exports = async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against HarperDB, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -module.exports = async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to HarperDB- the exact same result could have been achieved by hitting the core HarperDB API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -const customValidation = require('../helpers/customValidation'); - -module.exports = async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Define Helpers](./define-helpers). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against HarperDB directly, by passing the standard Operations API. - -- **preValidation** - - This takes the authorization header from the inbound request and executes the same authentication as the standard HarperDB Operations API. It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with HarperDB using the operations API. The `request.body` should contain a standard HarperDB operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against HarperDB without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the Custom Functions log file, custom_functions.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) - -- logger.debug(‘This should only fire once’) - -- logger.warn(‘This should never ever fire’) - -- logger.error(‘This did not go well’) - -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.1/custom-functions/example-projects.md b/versioned_docs/version-4.1/custom-functions/example-projects.md deleted file mode 100644 index bfa50ab5..00000000 --- a/versioned_docs/version-4.1/custom-functions/example-projects.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Example Projects ---- - -# Example Projects - -**Library of example projects and tutorials using Custom Functions:** - -- [Authorization in HarperDB using Okta Customer Identity Cloud](https://www.harperdb.io/post/authorization-in-harperdb-using-okta-customer-identity-cloud), by Yitaek Hwang - -- [How to Speed Up your Applications by Caching at the Edge with HarperDB](https://dev.to/doabledanny/how-to-speed-up-your-applications-by-caching-at-the-edge-with-harperdb-3o2l), by Danny Adams - -- [OAuth Authentication in HarperDB using Auth0 & Node.js](https://www.harperdb.io/post/oauth-authentication-in-harperdb-using-auth0-and-node-js), by Lucas Santos - -- [How To Create a CRUD API with Next.js & HarperDB Custom Functions](https://www.harperdb.io/post/create-a-crud-api-w-next-js-harperdb), by Colby Fayock - -- [Build a Dynamic REST API with Custom Functions](https://harperdb.io/blog/build-a-dynamic-rest-api-with-custom-functions/), by Terra Roush - -- [How to use HarperDB Custom Functions to Build your Entire Backend](https://dev.to/andrewbaisden/how-to-use-harperdb-custom-functions-to-build-your-entire-backend-a2m), by Andrew Baisden - -- [Using TensorFlowJS & HarperDB Custom Functions for Machine Learning](https://harperdb.io/blog/using-tensorflowjs-harperdb-for-machine-learning/), by Kevin Ashcraft - -- [Build & Deploy a Fitness App with Python & HarperDB](https://www.youtube.com/watch?v=KMkmA4i2FQc), by Patrick Löber - -- [Create a Discord Slash Bot using HarperDB Custom Functions](https://geekysrm.hashnode.dev/discord-slash-bot-with-harperdb-custom-functions), by Soumya Ranjan Mohanty - -- [How I used HarperDB Custom Functions to Build a Web App for my Newsletter](https://blog.hrithwik.me/how-i-used-harperdb-custom-functions-to-build-a-web-app-for-my-newsletter), by Hrithwik Bharadwaj - -- [How I used HarperDB Custom Functions and Recharts to create Dashboard](https://blog.greenroots.info/how-to-create-dashboard-with-harperdb-custom-functions-and-recharts), by Tapas Adhikary - -- [How To Use HarperDB Custom Functions With Your React App](https://dev.to/tyaga001/how-to-use-harperdb-custom-functions-with-your-react-app-2c43), by Ankur Tyagi - -- [Build a Web App Using HarperDB’s Custom Functions](https://www.youtube.com/watch?v=rz6prItVJZU), livestream by Jaxon Repp - -- [How to Web Scrape Using Python, Snscrape & Custom Functions](https://hackernoon.com/how-to-web-scrape-using-python-snscrape-and-harperdb), by Davis David - -- [What’s the Big Deal w/ Custom Functions](https://rss.com/podcasts/harperdb-select-star/278933/), Select\* Podcast diff --git a/versioned_docs/version-4.1/custom-functions/host-static.md b/versioned_docs/version-4.1/custom-functions/host-static.md deleted file mode 100644 index cae874b3..00000000 --- a/versioned_docs/version-4.1/custom-functions/host-static.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Host A Static Web UI ---- - -# Host A Static Web UI - -The [@fastify/static](https://github.com/fastify/fastify-static) module can be utilized to serve static files. - -Install the module in your project by running `npm i @fastify/static` from inside your project directory. - -Register `@fastify/static` with the server and set `root` to the absolute path of the directory that contains the static files to serve. - -For further information on how to send specific files see the [@fastify/static](https://github.com/fastify/fastify-static) docs. - -```javascript -module.exports = async (server, { hdbCore, logger }) => { - server.register(require('@fastify/static'), { - root: path.join(__dirname, 'public'), - }); -}; -``` diff --git a/versioned_docs/version-4.1/custom-functions/index.md b/versioned_docs/version-4.1/custom-functions/index.md deleted file mode 100644 index d5c7572f..00000000 --- a/versioned_docs/version-4.1/custom-functions/index.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -Custom functions are a key part of building a complete HarperDB application. It is highly recommended that you use Custom Functions as the primary mechanism for your application to access your HarperDB database. Using Custom Functions gives you complete control over the accessible endpoints, how users are authenticated and authorized, what data is accessed from the database, and how it is aggregated and returned to users. - -- Add your own API endpoints to a standalone API server inside HarperDB - -- Use HarperDB Core methods to interact with your data at lightning speed - -- Custom Functions are powered by Fastify, so they’re extremely flexible - -- Manage in HarperDB Studio, or use your own IDE and Version Management System - -- Distribute your Custom Functions to all your HarperDB instances with a single click - ---- - -- [Requirements and Definitions](custom-functions/requirements-definitions) - -- [Create A Project](custom-functions/create-project) - -- [Define Routes](custom-functions/define-routes) - -- [Define Helpers](custom-functions/define-helpers) - -- [Host a Static UI](custom-functions/host-static) diff --git a/versioned_docs/version-4.1/custom-functions/requirements-definitions.md b/versioned_docs/version-4.1/custom-functions/requirements-definitions.md deleted file mode 100644 index 5fa59fdb..00000000 --- a/versioned_docs/version-4.1/custom-functions/requirements-definitions.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: Requirements And Definitions ---- - -# Requirements And Definitions - -Before you get started with Custom Functions, here’s a primer on the basic configuration and the structure of a Custom Functions Project. - -## Configuration - -Custom Functions are configured in the harperdb-config.yaml file located in the operations API root directory (by default this is a directory named `hdb` located in the home directory of the current user). Below is a view of the Custom Functions' section of the config YAML file, plus descriptions of important Custom Functions settings. - -```yaml -customFunctions: - enabled: true - network: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - https: false - keepAliveTimeout: 5000 - port: 9926 - timeout: 120000 - nodeEnv: production - root: ~/hdb/custom_functions - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -- **`enabled`** - A boolean value that tells HarperDB to start the Custom Functions server. Set it to **true** to enable custom functions and **false** to disable. `enabled` is `true` by default. - -- **`network.port`** - This is the port HarperDB will use to start a standalone Fastify Server dedicated to serving your Custom Functions’ routes. - -- **`root`** - This is the root directory where your Custom Functions projects and their files will live. By default, it’s in your \, but you can locate it anywhere--in a developer folder next to your other development projects, for example. - -_Please visit our [configuration docs](../configuration) for a more comprehensive look at these settings._ - -## Project Structure - -**project folder** - -The name of the folder that holds your project files serves as the root prefix for all the routes you create. All routes created in the **dogs** project folder will have a URL like this: **[https://my-server-url.com:9926/dogs/my/route](https://my-server-url.com:9926/dogs/my/route)**. As such, it’s important that any project folders you create avoid any characters that aren’t URL-friendly. You should avoid URL delimiters in your folder names. - -**/routes folder** - -Files in the **routes** folder define the requests that your Custom Functions server will handle. They are [standard Fastify route declarations](https://www.fastify.io/docs/latest/Reference/Routes/), so if you’re familiar with them, you should be up and running in no time. The default components for a route are the url, method, preValidation, and handler. - -```javascript -module.exports = async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -**/helpers folder** - -These files are JavaScript modules that you can use in your handlers, or for custom `preValidation` hooks. Examples include calls to third party Authentication services, filters for results of calls to HarperDB, and custom error responses. As modules, you can use standard import and export functionality. - -```javascript -'use strict'; - -const dbFilter = (databaseResultsArray) => databaseResultsArray.filter((result) => result.showToApi === true); - -module.exports = dbFilter; -``` - -**/static folder** - -If you’d like to serve your visitors a static website, you can place the html and supporting files into a directory called **static**. The directory must have an **index.html** file, and can have as many supporting resources as are necessary in whatever subfolder structure you prefer within that **static** directory. diff --git a/versioned_docs/version-4.1/custom-functions/restarting-server.md b/versioned_docs/version-4.1/custom-functions/restarting-server.md deleted file mode 100644 index 16fd9771..00000000 --- a/versioned_docs/version-4.1/custom-functions/restarting-server.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Restarting the Server ---- - -# Restarting the Server - -One way to manage Custom Functions is through [HarperDB Studio](../harperdb-studio/). It performs all the necessary operations automatically. To get started, navigate to your instance in HarperDB Studio and click the subnav link for "functions". If you have not yet enabled Custom Functions, it will walk you through the process. Once configuration is complete, you can manage and deploy Custom Functions in minutes. - -For any changes made to your routes, helpers, or projects, you’ll need to restart the Custom Functions server to see them take effect. HarperDB Studio does this automatically whenever you create or delete a project, or add, edit, or edit a route or helper. If you need to start the Custom Functions server yourself, you can use the following operation to do so: - -```json -{ - "operation": "restart_service", - "service": "custom_functions" -} -``` diff --git a/versioned_docs/version-4.1/custom-functions/templates.md b/versioned_docs/version-4.1/custom-functions/templates.md deleted file mode 100644 index 1f1c09a2..00000000 --- a/versioned_docs/version-4.1/custom-functions/templates.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: Templates ---- - -# Templates - -Check out our always-expanding library of templates in our open-source [HarperDB-Add-Ons GitHub repo](https://github.com/HarperDB-Add-Ons). diff --git a/versioned_docs/version-4.1/custom-functions/using-npm-git.md b/versioned_docs/version-4.1/custom-functions/using-npm-git.md deleted file mode 100644 index c098e074..00000000 --- a/versioned_docs/version-4.1/custom-functions/using-npm-git.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Using NPM and Git ---- - -# Using NPM and Git - -Custom function projects can be structured and managed like normal Node.js projects. You can include external dependencies, include them in your route and helper files, and manage your revisions without changing your development tooling or pipeline. - -- To initialize your project to use npm packages, use the terminal to execute `npm init` from the root of your project folder. - -- To implement version control using git, use the terminal to execute `git init` from the root of your project folder. diff --git a/versioned_docs/version-4.1/getting-started/getting-started.md b/versioned_docs/version-4.1/getting-started/getting-started.md deleted file mode 100644 index c4414607..00000000 --- a/versioned_docs/version-4.1/getting-started/getting-started.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Getting Started ---- - -# Getting Started - -Getting started with HarperDB is easy and fast. - -The quickest way to get up and running with HarperDB is with HarperDB Cloud, our database-as-a-service offering, which this guide will utilize. - -### Set Up a HarperDB Instance - -Before you can start using HarperDB you need to set up an instance. Note, if you would prefer to install HarperDB locally, [check out the installation guides including Linux, Mac, and many other options](../4.1/install-harperdb). - -1. [Sign up for the HarperDB Studio](https://studio.harperdb.io/sign-up) -1. [Create a new HarperDB Cloud instance](../4.1/harperdb-studio/instances#create-a-new-instance) - -> HarperDB Cloud instance provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -### Using the HarperDB Studio - -Now that you have a HarperDB instance, you can do pretty much everything you’d like through the Studio. This section links to appropriate articles to get you started interacting with your data. - -1. [Create a schema](../4.1/harperdb-studio/manage-schemas-browse-data#create-a-schema) -1. [Create a table](../4.1/harperdb-studio/manage-schemas-browse-data#create-a-table) -1. [Add a record](../4.1/harperdb-studio/manage-schemas-browse-data#add-a-record) -1. [Load CSV data](../4.1/harperdb-studio/manage-schemas-browse-data#load-csv-data) (Here’s a sample CSV of the HarperDB team’s dogs) -1. [Query data via SQL](../4.1/harperdb-studio/query-instance-data) - -### Using the HarperDB API - -Complete HarperDB API documentation is available at api.harperdb.io. The HarperDB Studio features an example code builder that generates API calls in the programming language of your choice. For example purposes, a basic cURL command is shown below to create a schema called dev. - -``` -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` - -Breaking it down, there are only a few requirements for interacting with HarperDB: - -- Using the HTTP POST method. -- Providing the URL of the HarperDB instance. -- Providing the Authorization header (more on using Basic authentication). -- Providing the Content-Type header. -- Providing a JSON body with the desired operation and any additional operation properties (shown in the --data-raw parameter). This is the only parameter that needs to be changed to execute alternative operations on HarperDB. - -### Video Tutorials - -[HarperDB video tutorials are available within the HarperDB Studio](../4.1/harperdb-studio/resources#video-tutorials). HarperDB and the HarperDB Studio are constantly changing, as such, there may be small discrepancies in UI/UX. diff --git a/versioned_docs/version-4.1/harperdb-cli.md b/versioned_docs/version-4.1/harperdb-cli.md deleted file mode 100644 index 0e8e5033..00000000 --- a/versioned_docs/version-4.1/harperdb-cli.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: HarperDB CLI ---- - -# HarperDB CLI - -The HarperDB command line interface (CLI) is used to administer [self-installed HarperDB instances](./install-harperdb/). - -## Installing HarperDB - -To install HarperDB with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, HarperDB installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](./configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -#### Environment Variables - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -#### Command Line Arguments - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -## Starting HarperDB - -To start HarperDB after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -## Stopping HarperDB - -To stop HarperDB once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -## Restarting HarperDB - -To restart HarperDB once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -## Managing HarperDB Service(s) - -The following commands are used to start, restart, or stop one or more HarperDB service without restarting the full application: - -```bash -harperdb start --service harperdb,"custom functions",ipc -harperdb stop --service harperdb -harperdb restart --service "custom functions" -``` - -The following services are managed via the above commands: - -- HarperDB -- Custom Functions -- IPC -- Clustering - ---- - -## Getting the HarperDB Version - -To check the version of HarperDB that is installed run the following command: - -```bash -harperdb version -``` - -## Get all available CLI commands - -To display all available HarperDB CLI commands along with a brief description run: - -```bash -harperdb help -``` - -## Get the status of HarperDB and clustering - -To display the status of the HarperDB process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - -## Backups - -HarperDB uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that HarperDB maintains safety of database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a HarperDB database. Database files are stored in the hdb/schemas directory (organized schema directories). As long as the snapshot is an atomic snapshot of these database files, the data can be copied/movied back into the schemas directory to restore a previous backup (with HarperDB shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. diff --git a/versioned_docs/version-4.1/harperdb-cloud/alarms.md b/versioned_docs/version-4.1/harperdb-cloud/alarms.md deleted file mode 100644 index 11530aac..00000000 --- a/versioned_docs/version-4.1/harperdb-cloud/alarms.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: HarperDB Cloud Alarms ---- - -# HarperDB Cloud Alarms - -HarperDB Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../harperdb-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. - -- **Threshold**: Definition of the alarm threshold. - -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. - -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | ----------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../harperdb-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../harperdb-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../harperdb-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.1/harperdb-cloud/index.md b/versioned_docs/version-4.1/harperdb-cloud/index.md deleted file mode 100644 index 28e70b01..00000000 --- a/versioned_docs/version-4.1/harperdb-cloud/index.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: HarperDB Cloud ---- - -# HarperDB Cloud - -HarperDB Cloud is the easiest way to test drive HarperDB, it’s HarperDB-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. HarperDB Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. diff --git a/versioned_docs/version-4.1/harperdb-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.1/harperdb-cloud/instance-size-hardware-specs.md deleted file mode 100644 index ae3042b8..00000000 --- a/versioned_docs/version-4.1/harperdb-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: HarperDB Cloud Instance Size Hardware Specs ---- - -# HarperDB Cloud Instance Size Hardware Specs - -While HarperDB Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.nano | 0.5 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.1/harperdb-cloud/iops-impact.md b/versioned_docs/version-4.1/harperdb-cloud/iops-impact.md deleted file mode 100644 index 99be73f7..00000000 --- a/versioned_docs/version-4.1/harperdb-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -HarperDB, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running HarperDB. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that HarperDB performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers HarperDB Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage is then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## HarperDB Cloud Storage - -HarperDB Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all HarperDB Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html). - -## Estimating IOPS for HarperDB Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact HarperDB Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In case of IoT sensors where data collection will be sustained high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collection 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. HarperDB utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case is more transactional systems without the requirement for high performance load. A good rule to follow is that any HarperDB operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to HarperDB’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, HarperDB should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.1/harperdb-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.1/harperdb-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index 3ca1cdde..00000000 --- a/versioned_docs/version-4.1/harperdb-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Verizon 5G Wavelength Instances ---- - -# Verizon 5G Wavelength Instances - -These instances are only accessible from the Verizon network. When accessing your HarperDB instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -HarperDB on Verizon 5G Wavelength brings HarperDB closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from HarperDB to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -HarperDB 5G Wavelength Instance Specs -While HarperDB 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## HarperDB 5G Wavelength Storage - -HarperDB 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher it’s baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of HarperDB, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger HarperDB volume. Learn more about the impact of IOPS on performance here. - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.1/harperdb-studio/create-account.md b/versioned_docs/version-4.1/harperdb-studio/create-account.md deleted file mode 100644 index 3d146bb6..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [HarperDB Studio sign up page](https://studio.harperdb.io/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your HarperDB Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -2. Review the Privacy Policy and Terms of Service. -3. Click the sign up for free button. -4. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -5. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.1/harperdb-studio/enable-mixed-content.md b/versioned_docs/version-4.1/harperdb-studio/enable-mixed-content.md deleted file mode 100644 index 5a198b91..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -Enabling mixed content is required in cases where you would like to connect the HarperDB Studio to HarperDB Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.1/harperdb-studio/index.md b/versioned_docs/version-4.1/harperdb-studio/index.md deleted file mode 100644 index d3cdbaeb..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: HarperDB Studio ---- - -# HarperDB Studio - -HarperDB Studio is the web-based GUI for HarperDB. Studio enables you to administer, navigate, and monitor all of your HarperDB instances in a simple, user friendly interface without any knowledge of the underlying HarperDB API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - ---- - -## How does Studio Work? - -While HarperDB Studio is web based and hosted by us, all database interactions are performed on the HarperDB instance the studio is connected to. The HarperDB Studio loads in your browser, at which point you login to your HarperDB instances. Credentials are stored in your browser cache and are not transmitted back to HarperDB. All database interactions are made via the HarperDB Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -HarperDB Studio enables users to manage both HarperDB Cloud instances and privately hosted instances all from a single UI. All HarperDB instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.1/harperdb-studio/instance-configuration.md b/versioned_docs/version-4.1/harperdb-studio/instance-configuration.md deleted file mode 100644 index 64a802af..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/instance-configuration.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -HarperDB instance configuration can be viewed and managed directly through the HarperDB Studio. HarperDB Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. User-installed instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in HarperDB database user_ - -- Created Date (HarperDB Cloud only) - -- Region (HarperDB Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (HarperDB Cloud only) - -- Disk IOPS (HarperDB Cloud only) - -## Update Instance RAM - -HarperDB Cloud instance size and user-installed instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For HarperDB Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if HarperDB Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The HarperDB Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - - If you do have a credit card associated, you will be presented with the updated billing information. - - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The HarperDB instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -2. The instance will begin deleting immediately. - -## Restart Instance - -The HarperDB Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -2. The instance will begin restarting immediately. diff --git a/versioned_docs/version-4.1/harperdb-studio/instance-example-code.md b/versioned_docs/version-4.1/harperdb-studio/instance-example-code.md deleted file mode 100644 index d5805510..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/instance-example-code.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Instance Example Code ---- - -# Instance Example Code - -Example code prepopulated with the instance URL and authorization token for the logged in database user can be found on the **example code** page of the HarperDB Studio. Code samples are generated based on the HarperDB API Documentation Postman collection. Code samples accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **example code** in the instance control bar. - -5. Select the appropriate **category** from the left navigation. - -6. Select the appropriate **operation** from the left navigation. - -7. Select your desired language/variant from the **Choose Programming Language** dropdown. - -8. Copy code from the sample code panel using the copy icon. - -## Supported Languages - -Sample code uses two identifiers: **language** and **variant**. - -- **language** is the programming language that the sample code is generated in. - -- **variant** is the methodology or library used by the language to send HarperDB requests. - -The list of available language/variants are as follows: - -| Language | Variant | -| ----------- | ------------- | -| C# | RestSharp | -| cURL | cURL | -| Go | Native | -| HTTP | HTTP | -| Java | OkHttp | -| Java | Unirest | -| JavaScript | Fetch | -| JavaScript | jQuery | -| JavaScript | XHR | -| NodeJs | Axios | -| NodeJs | Native | -| NodeJs | Request | -| NodeJs | Unirest | -| Objective-C | NSURLSession | -| OCaml | Cohttp | -| PHP | cURL | -| PHP | HTTP_Request2 | -| PowerShell | RestMethod | -| Python | http.client | -| Python | Requests | -| Ruby | Net:HTTP | -| Shell | Httpie | -| Shell | wget | -| Swift | URLSession | diff --git a/versioned_docs/version-4.1/harperdb-studio/instance-metrics.md b/versioned_docs/version-4.1/harperdb-studio/instance-metrics.md deleted file mode 100644 index 7a3f18e3..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/instance-metrics.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The HarperDB Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [HarperDB logs](../logging), and HarperDB Cloud alarms (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.1/harperdb-studio/instances.md b/versioned_docs/version-4.1/harperdb-studio/instances.md deleted file mode 100644 index ad629b8a..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/instances.md +++ /dev/null @@ -1,151 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The HarperDB Studio allows you to administer all of your HarperDB instances in one place. HarperDB currently offers the following instance types: - -- **HarperDB Cloud Instance** - Managed installations of HarperDB, what we call HarperDB Cloud. -- **5G Wavelength Instance** - Managed installations of HarperDB running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ -- **User-Installed Instance** - Any HarperDB installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. HarperDB stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the HarperDB instances using the standard [HarperDB API](https://api.harperdb.io/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. HarperDB Cloud and user-installed instances are listed together. - -## Create a New Instance - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization for the instance to be created under. -3. Click the **Create New HarperDB Cloud Instance + Register User-Installed Instance** card. -4. Select your desired Instance Type. -5. For a HarperDB Cloud Instance or a HarperDB 5G Wavelength Instance, click **Create HarperDB Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 2. Enter Instance Username - - _This is the username of the initial HarperDB instance super user._ - - 3. Enter Instance Password - - _This is the password of the initial HarperDB instance super user._ - - 2. Click **Instance Details** to move to the next page. - 3. Select Instance Specs - 1. Select Instance RAM - - _HarperDB Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance. More on instance specs._ - - 2. Select Storage Size - - _Each instance has a mounted storage volume where your HarperDB data will reside. Storage is provisioned based on space and IOPS. More on IOPS Impact on Performance._ - - 3. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 4. Click **Confirm Instance Details** to move to the next page. - 5. Review your Instance Details, if there is an error, use the back button to correct it. - 6. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 7. Click **Add Instance**. - 8. Your HarperDB Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register User-Installed Instance - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization for the instance to be created under. -3. Click the **Create New HarperDB Cloud Instance + Register User-Installed Instance** card. -4. Select **Register User-Installed Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 2. Enter Instance Username - - _The username of a HarperDB super user that is already configured in your HarperDB installation._ - - 3. Enter Instance Password - - _The password of a HarperDB super user that is already configured in your HarperDB installation._ - - 4. Enter Host - - _The host to access the HarperDB instance. For example, `harperdb.myhost.com` or `localhost`._ - - 5. Enter Port - - _The port to access the HarperDB instance. HarperDB defaults `9925`._ - - 6. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 2. Click **Instance Details** to move to the next page. - 3. Select Instance Specs - 1. Select Instance RAM - - _HarperDB instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 4. Click **Confirm Instance Details** to move to the next page. - 5. Review your Instance Details, if there is an error, use the back button to correct it. - 6. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 7. Click **Add Instance**. - 8. The HarperDB Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **HarperDB Cloud Instance** - This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **User-Installed Instance** - The instance will be removed from the HarperDB Studio only. This does not uninstall HarperDB from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Identify the proper instance card and click the trash can icon. -4. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -5. Click the **Do It** button. - -## Upgrade an Instance - -HarperDB instances can be resized on the [Instance Configuration](./instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Identify the proper instance card and click the lock icon. -4. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -4. Enter the database username. - - _The username of a HarperDB user that is already configured in your HarperDB instance._ - -5. Enter the database password. - - _The password of a HarperDB user that is already configured in your HarperDB instance._ - -6. Click **Log In**. diff --git a/versioned_docs/version-4.1/harperdb-studio/login-password-reset.md b/versioned_docs/version-4.1/harperdb-studio/login-password-reset.md deleted file mode 100644 index 163a6dee..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your HarperDB Studio Account - -To log into your existing HarperDB Studio account: - -1. Navigate to the [HarperDB Studio](https://studio.harperdb.io/). -2. Enter your email address. -3. Enter your password. -4. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the HarperDB Studio password reset page. -2. Enter your email address. -3. Click **send password reset email**. -4. If the account exists, you will receive an email with a temporary password. -5. Navigate back to the HarperDB Studio login page. -6. Enter your email address. -7. Enter your temporary password. -8. Click **sign in**. -9. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -10. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the HarperDB Studio profile page. -2. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -3. Click the **Update Password** button. diff --git a/versioned_docs/version-4.1/harperdb-studio/manage-charts.md b/versioned_docs/version-4.1/harperdb-studio/manage-charts.md deleted file mode 100644 index c2b48a55..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/manage-charts.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: Charts ---- - -# Charts - -The HarperDB Studio includes a charting feature within an instance. They are generated in real time based on your existing data and automatically refreshed every 15 seconds. Instance charts can be accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **charts** in the instance control bar. - -## Creating a New Chart - -Charts are generated based on SQL queries, therefore to build a new chart you first need to build a query. Instructions as follows (starting on the charts page described above): - -1. Click **query** in the instance control bar. -2. Enter the SQL query you would like to generate a chart from. - - _For example, using the dog demo data from the API Docs, we can get the average dog age per owner with the following query: `SELECT AVG(age) as avg_age, owner_name FROM dev.dog GROUP BY owner_name`._ - -3. Click **Execute**. - -4. Click **create chart** at the top right of the results table. - -5. Configure your chart. - 1. Choose chart type. - - _HarperDB Studio offers many standard charting options like line, bar, etc._ - - 2. Choose a data column. - - _This column will be used to plot the data point. Typically, this is the values being calculated in the `SELECT` statement. Depending on the chart type, you can select multiple data columns to display on a single chart._ - - 3. Depending on the chart type, you will need to select a grouping. - - _This could be labeled as x-axis, label, etc. This will be used to group the data, typically this is what you used in your **GROUP BY** clause._ - - 4. Enter a chart name. - - _Used for identification purposes and will be displayed at the top of the chart._ - - 5. Choose visible to all org users toggle. - - _Leaving this option off will limit chart visibility to just your HarperDB Studio user. Toggling it on will enable all users with this Organization to view this chart._ - - 6. Click **Add Chart**. - 7. The chart will now be visible on the **charts** page. - -The example query above, configured as a bar chart, results in the following chart: - -![Average Age per Owner Example](/img/v4.1/ave-age-per-owner-ex.png) - -## Downloading Charts - -HarperDB Studio charts can be downloaded in SVG, PNG, and CSV format. Instructions as follows (starting on the charts page described above): - -1. Identify the chart you would like to export. -2. Click the three bars icon. - -3. Select the appropriate download option. - -4. The Studio will generate the export and begin downloading immediately. - -## Delete a Chart - -Delete a chart as follows (starting on the charts page described above): - -1. Identify the chart you would like to delete. - -2. Click the X icon. - -3. Click the **confirm delete chart** button. - -4. The chart will be deleted. - -Deleting a chart that is visible to all Organization users will delete it for all users. diff --git a/versioned_docs/version-4.1/harperdb-studio/manage-clustering.md b/versioned_docs/version-4.1/harperdb-studio/manage-clustering.md deleted file mode 100644 index 7872fc09..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/manage-clustering.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Manage Clustering ---- - -# Manage Clustering - -HarperDB instance clustering and replication can be configured directly through the HarperDB Studio. It is recommended to read through the clustering documentation first to gain a strong understanding of HarperDB clustering behavior. - -All clustering configuration is handled through the **cluster** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **cluster** in the instance control bar. - -Note, the **cluster** page will only be available to super users. - ---- - -## Initial Configuration - -HarperDB instances do not have clustering configured by default. The HarperDB Studio will walk you through the initial configuration. Upon entering the **cluster** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. - -1. Create a cluster user, read more about this here: Clustering Users and Roles. - - Enter username. - - Enter password. - - Click **Create Cluster User**. - -2. Click **Set Cluster Node Name**. -3. Click **Enable Instance Clustering**. - -At this point the Studio will restart your HarperDB Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside of the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all schemas and tables. - ---- - -## Connect an Instance - -HarperDB Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -2. Identify the instance you would like to connect from the **unconnected instances** panel. - -3. Click the plus icon next the appropriate instance. - -4. If configurations are correct, all schemas will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -HarperDB Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -2. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, schema, and table for replication to be configured. - -2. For publish, click the toggle switch in the **publish** column. - -3. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.1/harperdb-studio/manage-functions.md b/versioned_docs/version-4.1/harperdb-studio/manage-functions.md deleted file mode 100644 index 38bbf82e..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/manage-functions.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -title: Manage Functions ---- - -# Manage Functions - -HarperDB Custom Functions are enabled by default and can be configured further through the HarperDB Studio. It is recommended to read through the Custom Functions documentation first to gain a strong understanding of HarperDB Custom Functions behavior. - -All Custom Functions configuration is handled through the **functions** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **functions** in the instance control bar. - -_Note, the **functions** page will only be available to super users._ - -## Manage Projects - -On the **functions** page of the HarperDB Studio you are presented with a functions management screen with the following properties: - -- **projects** - - Displays a list of Custom Functions projects residing on this instance. - -- **/project_name/routes** - - Only displayed if there is an existing project. Displays the routes files contained within the selected project. - -- **/project_name/helpers** - - Only displayed if there is an existing project. Displays the helper files contained within the selected project. - -- **/project_name/static** - - Only displayed if there is an existing project. Displays the static file count and a link to the static files contained within the selected project. Note, static files cannot currently be deployed through the Studio and must be deployed via the [HarperDB API](https://api.harperdb.io/) or manually to the server (not applicable with HarperDB Cloud). - -- **Root File Directory** - - Displays the root file directory where the Custom Functions projects reside on this instance. - -- **Custom Functions Server URL** - - Displays the base URL in which all Custom Functions are accessed for this instance. - -## Create a Project - -HarperDB Custom Functions Projects can be initialized with the following instructions. - -1. If this is your first project, skip this step. Click the plus icon next to the **projects** heading. - -2. Enter the project name in the text box located under the **projects** heading. - -3. Click the check mark icon next the appropriate instance. - -4. The Studio will take a few moments to provision a new project based on the [Custom Functions template](https://github.com/HarperDB/harperdb-custom-functions-template). - -5. The Custom Functions project is now created and ready to modify. - -## Modify a Project - -Custom Functions routes and helper functions can be modified directly through the Studio. From the **functions** page: - -1. Select the appropriate **project**. - -2. Select the appropriate **route** or **helper**. - -3. Modify the code with your desired changes. - -4. Click the save icon at the bottom right of the screen. - - _Note, saving modifications will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -## Create Additional Routes/Helpers - -To create an additional **route** to your Custom Functions project. From the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the plus icon to the right of the **routes** header. - -3. Enter the name of the new route in the textbox that appears. - -4. Click the check icon to create the new route. - - _Note, adding a route will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -To create an additional **helper** to your Custom Functions project. From the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the plus icon to the right of the **helpers** header. - -3. Enter the name of the new helper in the textbox that appears. - -4. Click the check icon to create the new helper. - - _Note, adding a helper will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -## Delete a Project/Route/Helper - -To delete a Custom Functions project from the **functions** page: - -1. Click the minus icon to the right of the **projects** header. - -2. Click the red minus icon to the right of the Custom Functions project you would like to delete. - -3. Confirm deletion by clicking the red check icon. - - _Note, deleting a project will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -To delete a Custom Functions _project route_ from the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the minus icon to the right of the **routes** header. - -3. Click the red minus icon to the right of the Custom Functions route you would like to delete. - -4. Confirm deletion by clicking the red check icon. - - _Note, deleting a route will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -To delete a Custom Functions _project helper_ from the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the minus icon to the right of the **helper** header. - -3. Click the red minus icon to the right of the Custom Functions header you would like to delete. - -4. Confirm deletion by clicking the red check icon. - - _Note, deleting a header will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -## Deploy Custom Functions Project to Other Instances - -The HarperDB Studio provides the ability to deploy Custom Functions projects to additional HarperDB instances within the same Studio Organization. To deploy Custom Functions projects to additional instances, starting from the **functions** page: - -1. Select the **project** you would like to deploy. - -2. Click the **deploy** button at the top right. - -3. A list of instances (excluding the current instance) within the organization will be displayed in tabular with the following information: - - **Instance Name**: The name used to describe the instance. - - **Instance URL**: The URL used to access the instance. - - **CF Capable**: Describes if the instance version supports Custom Functions (yes/no). - - **CF Enabled**: Describes if Custom Functions are configured and enabled on the instance (yes/no). - - **Has Project**: Describes if the selected Custom Functions project has been previously deployed to the instance (yes/no). - - **Deploy**: Button used to deploy the project to the instance. - - **Remote**: Button used to remove the project from the instance. _Note, this will only be visible if the project has been previously deployed to the instance._ - -4. In the appropriate instance row, click the **deploy** button. - - _Note, deploying a project will restart the Custom Functions server on the HarperDB instance receiving the deployment and may result in up to 60 seconds of downtime for all Custom Functions._ diff --git a/versioned_docs/version-4.1/harperdb-studio/manage-instance-roles.md b/versioned_docs/version-4.1/harperdb-studio/manage-instance-roles.md deleted file mode 100644 index dafc15a8..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/manage-instance-roles.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -HarperDB users can be managed directly through the HarperDB Studio. It is recommended to read through the users & roles documentation to gain a strong understanding of how they operate. - -Instance role configuration is handled through the roles page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **rules** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -2. Enter the role name. - -3. Click the green check mark. - -4. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -5. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -2. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -3. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the schemas section. - -2. Identify the appropriate role to delete and click the red minus sign in the same row. - -3. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.1/harperdb-studio/manage-instance-users.md b/versioned_docs/version-4.1/harperdb-studio/manage-instance-users.md deleted file mode 100644 index 1fb3c2ee..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -HarperDB instance clustering and replication can be configured directly through the HarperDB Studio. It is recommended to read through the clustering documentation first to gain a strong understanding of HarperDB clustering behavior. - -Instance user configuration is handled through the **users** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -HarperDB instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](./manage-instance-roles)._ - -2. Click **Add User**. - -## Edit a User - -HarperDB instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -2. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 2. Click **Update Password**. - -3. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 2. Click **Update Role**. - -4. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 2. Click **Delete User**. diff --git a/versioned_docs/version-4.1/harperdb-studio/manage-schemas-browse-data.md b/versioned_docs/version-4.1/harperdb-studio/manage-schemas-browse-data.md deleted file mode 100644 index 8b4061e3..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/manage-schemas-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Schemas / Browse Data ---- - -# Manage Schemas / Browse Data - -Manage instance schemas/tables and browse data in tabular format with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage schemas and tables, add new data, and more. - -## Manage Schemas and Tables - -#### Create a Schema - -1. Click the plus icon at the top right of the schemas section. -2. Enter the schema name. -3. Click the green check mark. - -#### Delete a Schema - -Deleting a schema is permanent and irreversible. Deleting a schema removes all tables and data within it. - -1. Click the minus icon at the top right of the schemas section. -2. Identify the appropriate schema to delete and click the red minus sign in the same row. -3. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired schema from the schemas section. -2. Click the plus icon at the top right of the tables section. -3. Enter the table name. -4. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -5. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired schema from the schemas section. -2. Click the minus icon at the top right of the tables section. -3. Identify the appropriate table to delete and click the red minus sign in the same row. -4. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the schema/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -2. This expands the search filters. -3. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -2. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 2. Click **Import From URL**. - 3. The CSV will load, and you will be redirected back to browse table data. -3. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 2. Navigate to your desired CSV file and select it. - 3. Click **Insert X Records**, where X is the number of records in your CSV. - 4. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -2. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -3. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -4. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -2. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -3. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -2. Click the **delete icon**. -3. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the schema/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.1/harperdb-studio/organizations.md b/versioned_docs/version-4.1/harperdb-studio/organizations.md deleted file mode 100644 index 83f99150..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -HarperDB Studio organizations provide the ability to group HarperDB Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique HarperDB Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for HarperDB Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the HarperDB Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the **Create a New Organization** card. -3. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your HarperDB Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -4. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the HarperDB Studio Organizations page. -2. Identify the proper organization card and click the trash can icon. -3. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -4. Click the **Do It** button. - -## Manage Users - -HarperDB Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. In the **add user** box, enter the new user’s email address. -5. Click **Add User**. - -Users may or may not already be HarperDB Studio users when adding them to an organization. If the HarperDB Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a HarperDB Studio account, they will receive an email welcoming them to HarperDB Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the HarperDB Studio Organizations page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their HarperDB Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -6. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_HarperDB billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or user-installed instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -2. Click **Add Coupon**. -3. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.1/harperdb-studio/query-instance-data.md b/versioned_docs/version-4.1/harperdb-studio/query-instance-data.md deleted file mode 100644 index 588f7d4f..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the HarperDB Studio with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **query** in the instance control bar. -5. Enter your SQL query in the SQL query window. -6. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The HarperDB Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.1/harperdb-studio/resources.md b/versioned_docs/version-4.1/harperdb-studio/resources.md deleted file mode 100644 index 528f4e11..00000000 --- a/versioned_docs/version-4.1/harperdb-studio/resources.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Resources (Marketplace, Drivers, Tutorials, & Example Code) ---- - -# Resources (Marketplace, Drivers, Tutorials, & Example Code) - -HarperDB Studio resources are available regardless of whether or not you are logged in. - -# HarperDB Marketplace - -The [HarperDB Marketplace](https://studio.harperdb.io/resources/marketplace/active) is a collection of SDKs and connectors that enable developers to expand upon HarperDB for quick and easy solution development. Extensions are built and supported by the HarperDB Community. Each extension is hosted on the appropriate package manager or host. - -To download a Marketplace extension: - -1. Navigate to the [HarperDB Marketplace](https://studio.harperdb.io/resources/marketplace/active) page. -2. Identity the extension you would like to use. -3. Either click the link to the package. -4. Follow the extension’s instructions to proceed. - -You can submit your rating for each extension by clicking on the stars. - -## HarperDB Drivers - -HarperDB offers standard drivers to connect real-time HarperDB data with BI, analytics, reporting and data visualization technologies. Drivers are built and maintained by [CData Software](https://www.cdata.com/drivers/harperdb/). - -To download a driver: - -1. Navigate to the [HarperDB Drivers](https://studio.harperdb.io/resources/marketplace/active) page. -2. Identity the driver you would like to use. -3. Click the download link. -4. For additional instructions, visit the support link on the driver card. - -## Video Tutorials - -HarperDB offers video tutorials available in the Studio on the [HarperDB Tutorials](https://studio.harperdb.io/resources/tutorials/UExsZ1RNVEtzeXBTNUdJbjRZaTNOeEM0aW5YX3RBNU85SS4yODlGNEE0NkRGMEEzMEQy) page as well as our [YouTube channel](https://www.youtube.com/playlist?list=PLlgTMTKsypS5GIn4Yi3NxC4inX_tA5O9I). The HarperDB Studio is changing all the time, as a result these, the videos may not include all of the current Studio features. - -## Example Code - -The [code examples](https://studio.harperdb.io/resources/examples/QuickStart%20Examples/Create%20dev%20Schema) page offers example code for many different programming languages. These samples will include a placeholder for your authorization token. Full code examples with the authorization token prepopulated are available within individual instance pages. diff --git a/versioned_docs/version-4.1/index.md b/versioned_docs/version-4.1/index.md deleted file mode 100644 index 74e5b891..00000000 --- a/versioned_docs/version-4.1/index.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Documentation ---- - -# Documentation - -HarperDB's documentation covers installation, getting started, APIs, security, and much more. Browse the topics at left, or choose one of the commonly used documentation sections below. - ---- - -- [Install HarperDB Locally](./4.1/install-harperdb) -- [Getting Started](./4.1/getting-started) -- [HarperDB Operations API](https://api.harperdb.io) -- [HarperDB Studio](./4.1/harperdb-studio) -- [HarperDB Cloud](./4.1/harperdb-cloud) -- [Developer Project Examples](https://github.com/search?q=harperdb) -- [Support](./4.1/support) diff --git a/versioned_docs/version-4.1/install-harperdb/index.md b/versioned_docs/version-4.1/install-harperdb/index.md deleted file mode 100644 index 8dfe4efc..00000000 --- a/versioned_docs/version-4.1/install-harperdb/index.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Install HarperDB ---- - -# Install HarperDB - -This documentation contains information for installing HarperDB locally. Note that if you’d like to get up and running quickly, you can try a [managed instance with HarperDB Cloud](https://studio.harperdb.io/sign-up). HarperDB is a cross-platform database; we recommend Linux for production use, but HarperDB can run on Windows and Mac as well, for development purposes. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. - -HarperDB runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing HarperDB, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm), but generally NVM can be installed with: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install 18 -``` - -### Install and Start HarperDB - -Then you can install HarperDB with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -HarperDB will automatically start after installation. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, configure your operating system for use a database server in our linux installation guide](install-harperdb/linux). - -# With Docker - -If you would like to run HarperDB in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a HarperDB container. - -# Offline Install - -If you need to install HarperDB on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -For more information visit the [HarperDB Command Line Interface](../4.1/harperdb-cli) guide. - -# Installation on Less Common Platforms - -HarperDB comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.1/install-harperdb/linux.md b/versioned_docs/version-4.1/install-harperdb/linux.md deleted file mode 100644 index 1b65b515..00000000 --- a/versioned_docs/version-4.1/install-harperdb/linux.md +++ /dev/null @@ -1,211 +0,0 @@ ---- -title: Linux Installation and Configuration ---- - -# Linux Installation and Configuration - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install HarperDB. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to HarperDB with sudo privileges exists -1. An additional volume for storing HarperDB files is attached to the Linux instance -1. Traffic to ports 9925 (HarperDB Operations API,) 9926 (HarperDB Custom Functions,) and 9932 (HarperDB Clustering) is permitted - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start HarperDB - -Here is an example of installing HarperDB with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing HarperDB with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --OPERATIONSAPI_NETWORK_HTTPS "true" \ - --CUSTOMFUNCTIONS_NETWORK_HTTPS "true" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -HarperDB will automatically start after installation. If you wish HarperDB to start when the OS boots, you have two options - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=HarperDB - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [HarperDB Command Line Interface guide](../harperdb-cli) and the [HarperDB Configuration File guide](../configuration). diff --git a/versioned_docs/version-4.1/jobs.md b/versioned_docs/version-4.1/jobs.md deleted file mode 100644 index 98c901fe..00000000 --- a/versioned_docs/version-4.1/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Asynchronous Jobs ---- - -# Asynchronous Jobs - -HarperDB Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](https://api.harperdb.io/#0186bc25-b9ae-44e7-bd9e-8edc0f289aa2) - -[csv file load](https://api.harperdb.io/#c4b71011-8a1d-4cb2-8678-31c0363fea5e) - -[csv url load](https://api.harperdb.io/#d1e9f433-e250-49db-b44d-9ce2dcd92d32) - -[import from s3](https://api.harperdb.io/#820b3947-acbe-41f9-858b-2413cabc3a18) - -[delete_records_before](https://api.harperdb.io/#8de87e47-73a8-4298-b858-ca75dc5765c2) - -[export_local](https://api.harperdb.io/#49a02517-ada9-4198-b48d-8707db905be0) - -[export_to_s3](https://api.harperdb.io/#f6393e9f-e272-4180-a42c-ff029d93ddd4) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the id of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](https://api.harperdb.io/#d501bef7-dbb7-4714-b535-e466f6583dce) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the id is not know) use the [search_jobs_by_start_date](https://api.harperdb.io/#4474ca16-e4c2-4740-81b5-14ed98c5eeab) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.1/logging.md b/versioned_docs/version-4.1/logging.md deleted file mode 100644 index 234d5903..00000000 --- a/versioned_docs/version-4.1/logging.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: Logging ---- - -# Logging - -HarperDB maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the HarperDB application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/id` - This reports the name of the thread and the thread id, that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -HarperDB clustering utilizes two [Nats](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of HarperDB -and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate -log level configuration for them. To adjust their log verbosity set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are -`error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -HarperDB logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. -To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](./configuration). - -## Read Logs via the API - -To access specific logs you may query the HarperDB API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.1/reference/content-types.md b/versioned_docs/version-4.1/reference/content-types.md deleted file mode 100644 index 06d59559..00000000 --- a/versioned_docs/version-4.1/reference/content-types.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: HarperDB Supported Content Types ---- - -# HarperDB Supported Content Types - -HarperDB supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. HarperDB follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard HarperDB operations. - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by HarperDB, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with HarperDB. CBOR supports the full range of HarperDB data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and HarperDB's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with a support for all HarperDB data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with HarperDB's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. diff --git a/versioned_docs/version-4.1/reference/data-types.md b/versioned_docs/version-4.1/reference/data-types.md deleted file mode 100644 index 105323b4..00000000 --- a/versioned_docs/version-4.1/reference/data-types.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: HarperDB Supported Data Types ---- - -# HarperDB Supported Data Types - -HarperDB supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (HarperDB’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. As of v4.1, HarperDB supports MessagePack and CBOR, which allows for all of HarperDB supported data types. This includes: - -## Boolean - -true or false. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. - -## Number - -Numbers can be stored as signed integers up to 64-bit or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. JSON is parsed by JS, so the maximum safe (precise) integer is 9007199254740991 (larger numbers can be stored, but aren’t guaranteed integer precision). Custom Functions may use BigInt numbers to store/access larger 64-bit integers, but integers beyond 64-bit can’t be stored with integer precision (will be stored as standard double-precision numbers). - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in HarperDB. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in HarperDB’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in HarperDB. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in HarperDB property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. - -## Binary Data - -Binary data can be stored in property values as well. JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in HarperDB as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.1/reference/dynamic-schema.md b/versioned_docs/version-4.1/reference/dynamic-schema.md deleted file mode 100644 index 7b1a7e06..00000000 --- a/versioned_docs/version-4.1/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -HarperDB is built to make data ingestion simple. A primary driver of that is the Dynamic Schema. The purpose of this document is to provide a detailed explanation of the dynamic schema specifically related to schema definition and data ingestion. - -The dynamic schema provides the structure of schema and table namespaces while simultaneously providing the flexibility of a data-defined schema. Individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. HarperDB tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Schemas - -HarperDB schemas are analogous to a namespace that groups tables together. A schema is required to create a table. - -### Tables - -HarperDB tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in HarperDB. - -Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [HarperDB Storage Algorithm](./storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -Additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to HarperDB. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. HarperDB offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -HarperDB automatically creates two audit attributes used on each record. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [HarperDB API operations](https://api.harperdb.io/). - -**Create a Schema** - -```bash -{ - "operation": "create_schema", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and hash attribute name are the only required parameters. - -```bash -{ - "operation": "create_table", - "schema": "dev", - "table": "dog", - "hash_attribute": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.1/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "schema": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.1/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "schema": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.1/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "schema": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.1/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.1/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.1/reference/headers.md b/versioned_docs/version-4.1/reference/headers.md deleted file mode 100644 index e2798c03..00000000 --- a/versioned_docs/version-4.1/reference/headers.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: HarperDB Headers ---- - -# HarperDB Headers - -All HarperDB API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all HarperDB API responses: - -| Key | Example Value | Description | -| ----------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| hdb-response-time | 7.165 | This is the legacy header for reporting response time. It is deprecated and will be removed in 4.2. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.1/reference/index.md b/versioned_docs/version-4.1/reference/index.md deleted file mode 100644 index 762a0831..00000000 --- a/versioned_docs/version-4.1/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for HarperDB. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.1/reference/limits.md b/versioned_docs/version-4.1/reference/limits.md deleted file mode 100644 index fbcbc1cd..00000000 --- a/versioned_docs/version-4.1/reference/limits.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: HarperDB Limits ---- - -# HarperDB Limits - -This document outlines limitations of HarperDB. - -## Schema Naming Restrictions - -**Case Sensitivity** - -HarperDB schema metadata (schema names, table names, and attribute/column names) are case sensitive. Meaning schemas, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Schema Metadata Names** - -HarperDB schema metadata (schema names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -HarperDB limits number of attributes to 10,000 per table. diff --git a/versioned_docs/version-4.1/reference/storage-algorithm.md b/versioned_docs/version-4.1/reference/storage-algorithm.md deleted file mode 100644 index 90a880d4..00000000 --- a/versioned_docs/version-4.1/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The HarperDB storage algorithm is fundamental to the HarperDB core functionality, enabling the [Dynamic Schema](./dynamic-schema) and all other user-facing functionality. HarperDB is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within HarperDB. - -## Query Language Agnostic - -The HarperDB storage algorithm was designed to abstract the data storage from any individual query language. HarperDB currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, HarperDB offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each HarperDB table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. HarperDB tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [HarperDB Dynamic Schema](./dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -HarperDB inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## HarperDB Indexing Example (Single Table) - -![](/img/v4.1/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.1/security/basic-auth.md b/versioned_docs/version-4.1/security/basic-auth.md deleted file mode 100644 index d128471a..00000000 --- a/versioned_docs/version-4.1/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Authentication ---- - -# Authentication - -HarperDB uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a user name and password when making a request. - -** \***You do not need to log in separately. Basic Auth is added to each HTTP request like create_schema, create_table, insert etc… via headers.**\* ** - -A header is added to each HTTP request. The header key is **"Authorization"** the header value is **"Basic <<your username and password buffer token>>"** - -## Authentication in HarperDB Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for HarperDB. - -_Note: This function uses btoa. Learn about [btoa here](https://developer.mozilla.org/en-US/docs/Web/API/btoa)._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.1/security/certificate-management.md b/versioned_docs/version-4.1/security/certificate-management.md deleted file mode 100644 index f3324d8a..00000000 --- a/versioned_docs/version-4.1/security/certificate-management.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for the Operations API and the Custom Functions API. For information on certificate managment for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of HarperDB does not have HTTPS enabled for the Operations API or the Custom Functions API (see [configuration](../configuration) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set the `operationsApi.network.https` and `customFunctions.network.https` to `true` and restart HarperDB. - -By default HarperDB will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your HarperDB node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your HarperDB node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable HarperDB HTTPS and Replace Certificates - -To enable HTTPS, set the `operationsApi.network.https` and `customFunctions.network.https` to `true` and restart HarperDB. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the HarperDB configuration with the path of your new certificate files, and then restart HarperDB. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -```yaml -customFunctions: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for HarperDB, Nginx can be used as a reverse proxy for HarperDB. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to HarperDB as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for HarperDB, a number of different external services can be used as a reverse proxy for HarperDB. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to HarperDB as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for HarperDB administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.1/security/configuration.md b/versioned_docs/version-4.1/security/configuration.md deleted file mode 100644 index 12ae0c1d..00000000 --- a/versioned_docs/version-4.1/security/configuration.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -HarperDB was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with HarperDB. - -## CORS - -HarperDB allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, HarperDB enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file this can be found in <ROOTPATH>, the location you specified during install. - -2. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 2. To turn on, change to: `cors: true` - - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 2. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harperdb.io,https://products.harperdb.io` - - 3. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -HarperDB provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose HarperDB's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -HarperDB automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from HarperDB Operations API.** diff --git a/versioned_docs/version-4.1/security/index.md b/versioned_docs/version-4.1/security/index.md deleted file mode 100644 index 59ce5b39..00000000 --- a/versioned_docs/version-4.1/security/index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Security ---- - -# Security - -HarperDB uses role-based, attribute-level security to ensure that users can only gain access to the data they’re supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -- [JWT Authentication](security/jwt-auth) -- [Basic Authentication](security/basic-auth) -- [Configuration](configuration) -- [Users and Roles](security/users-and-roles) diff --git a/versioned_docs/version-4.1/security/jwt-auth.md b/versioned_docs/version-4.1/security/jwt-auth.md deleted file mode 100644 index 978ca4b4..00000000 --- a/versioned_docs/version-4.1/security/jwt-auth.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -HarperDB uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all HarperDB operations in the Bearer Token Authorization Header. The default expiry is one day. - -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their HarperDB credentials. The following POST body is sent to HarperDB. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by HarperDB. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by HarperDB. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). - -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.1/security/users-and-roles.md b/versioned_docs/version-4.1/security/users-and-roles.md deleted file mode 100644 index 586d5e11..00000000 --- a/versioned_docs/version-4.1/security/users-and-roles.md +++ /dev/null @@ -1,269 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -HarperDB utilizes a Role-Based Access Control (RBAC) framework to manage access to HarperDB instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in HarperDB - -Role permissions in HarperDB are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a HarperDB instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. HarperDB will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ - -2. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing schemas, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within HarperDB. See full breakdown of operations restricted to only super_user roles [here](#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a HarperDB instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. - -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. - -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop schemas & tables. Alternatively the role type can be assigned a string array. The values in this array are schemas and allows the role to only create and drop tables in the designated schemas. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. - -- Describe operations will return metadata for all schemas, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a HarperDB instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly defined CRUD access to existing table data. - -Example JSON for `add_role` request - -```json -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "schema_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true - } - ] - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [] - } - } - } - } -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional schema-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will stored within HarperDB, but ignored, as super_users have full access to the database._ - -- `permissions`: Schema tables that a role should have specific CRUD access to should be included in the final, schema-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its schema in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] -} -``` - -**Important Notes About Table Permissions** - -1. If a schema and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the schema and/or tables. - -2. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. - -2. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute`, even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -3. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -4. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -5. The `__createdtime__` and `__updatedtime__` attributes that HarperDB manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. - -6. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in HarperDB and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the schema-level CRUD permissions set for the roles._ - -| Schemas and Tables | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| describe_all | | -| describe_schema | | -| describe_table | | -| create_schema | X | -| drop_schema | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed HarperDB as `<>`. Because HarperDB stores files natively on the operating system, we only allow the HarperDB executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files HarperDB needs. This also keeps HarperDB more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.1/sql-guide/date-functions.md b/versioned_docs/version-4.1/sql-guide/date-functions.md deleted file mode 100644 index 535ac7b6..00000000 --- a/versioned_docs/version-4.1/sql-guide/date-functions.md +++ /dev/null @@ -1,223 +0,0 @@ ---- -title: SQL Date Functions ---- - -# SQL Date Functions - -HarperDB utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.1/sql-guide/delete.md b/versioned_docs/version-4.1/sql-guide/delete.md deleted file mode 100644 index 0ddbc6fd..00000000 --- a/versioned_docs/version-4.1/sql-guide/delete.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Delete ---- - -# Delete - -HarperDB supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` diff --git a/versioned_docs/version-4.1/sql-guide/features-matrix.md b/versioned_docs/version-4.1/sql-guide/features-matrix.md deleted file mode 100644 index 85b9257a..00000000 --- a/versioned_docs/version-4.1/sql-guide/features-matrix.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -HarperDB provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. If not, feel free to [add a Feature Request](https://feedback.harperdb.io/). - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.1/sql-guide/functions.md b/versioned_docs/version-4.1/sql-guide/functions.md deleted file mode 100644 index 8d161679..00000000 --- a/versioned_docs/version-4.1/sql-guide/functions.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -title: HarperDB SQL Functions ---- - -# HarperDB SQL Functions - -This SQL keywords reference contains the SQL functions available in HarperDB. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM schema.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM schema.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM schema.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM schema.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM schema.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | --------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM schema.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM schema.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM schema.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | -------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM schema.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM schema.table` | Used to list the schema(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM schema.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM schema.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM schema.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM schema.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM schema.table_1 CROSS JOIN schema.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM schema.table_1 FULL OUTER JOIN schema.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM schema.table_1 INNER JOIN schema.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM schema.table_1 LEFT OUTER JOIN schema.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM schema.table_1 RIGHT OUTER JOIN schema.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ----------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM schema.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM schema.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | -------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM schema.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO schema.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE schema.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.1/sql-guide/index.md b/versioned_docs/version-4.1/sql-guide/index.md deleted file mode 100644 index 554668e8..00000000 --- a/versioned_docs/version-4.1/sql-guide/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: HarperDB SQL Guide ---- - -# HarperDB SQL Guide - -The purpose of this guide is to describe the available functionality of HarperDB as it relates to supported SQL functionality. The SQL parser is still actively being developed and this document will be updated as more features and functionality becomes available. **A high-level view of supported features can be found [here](sql-guide/features-matrix).** - -HarperDB adheres to the concept of schemas & tables. This allows developers to isolate table structures from each other all within one database. diff --git a/versioned_docs/version-4.1/sql-guide/insert.md b/versioned_docs/version-4.1/sql-guide/insert.md deleted file mode 100644 index 56ff1f3e..00000000 --- a/versioned_docs/version-4.1/sql-guide/insert.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Insert ---- - -# Insert - -HarperDB supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. HarperDB does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` diff --git a/versioned_docs/version-4.1/sql-guide/joins.md b/versioned_docs/version-4.1/sql-guide/joins.md deleted file mode 100644 index e274e048..00000000 --- a/versioned_docs/version-4.1/sql-guide/joins.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Joins ---- - -# Joins - -HarperDB allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.1/sql-guide/json-search.md b/versioned_docs/version-4.1/sql-guide/json-search.md deleted file mode 100644 index b6c78eb2..00000000 --- a/versioned_docs/version-4.1/sql-guide/json-search.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: SQL JSON Search ---- - -# SQL JSON Search - -HarperDB automatically indexes all top level attributes in a row / object written to a table. However, any attributes which holds JSON does not have its nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, HarperDB offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -##### Sample Result - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a sample schema & data with query examples: [https://api.harperdb.io/](https://api.harperdb.io/) - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.1/sql-guide/reserved-word.md b/versioned_docs/version-4.1/sql-guide/reserved-word.md deleted file mode 100644 index 7add41ff..00000000 --- a/versioned_docs/version-4.1/sql-guide/reserved-word.md +++ /dev/null @@ -1,203 +0,0 @@ ---- -title: HarperDB SQL Reserved Words ---- - -# HarperDB SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a schema, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called ASSERT in the dev schema, a SQL select on that table would look like: - -``` -SELECT * from dev.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from dev.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.1/sql-guide/select.md b/versioned_docs/version-4.1/sql-guide/select.md deleted file mode 100644 index 353f005a..00000000 --- a/versioned_docs/version-4.1/sql-guide/select.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: Select ---- - -# Select - -HarperDB has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -\*The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword. diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoarea.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoarea.md deleted file mode 100644 index 0cb3824f..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoarea.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: geoArea ---- - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocontains.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocontains.md deleted file mode 100644 index 778d9f0f..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocontains.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: geoContains ---- - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain HarperDB Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "HarperDB Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoconvert.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoconvert.md deleted file mode 100644 index 6e22fdda..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoconvert.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: geoConvert ---- - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "HarperDB Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocrosses.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocrosses.md deleted file mode 100644 index 1ab1f2b3..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geocrosses.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: geoCrosses ---- - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodifference.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodifference.md deleted file mode 100644 index c8a02afc..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodifference.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: geoDifference ---- - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodistance.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodistance.md deleted file mode 100644 index a2515eb9..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geodistance.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Geodistance ---- - -#geoDistance -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between HarperDB’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoequal.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoequal.md deleted file mode 100644 index 87ce3d5a..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geoequal.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: geoEqual ---- - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see [https://developers.arcgis.com/documentation/spatial-references/](https://developers.arcgis.com/documentation/spatial-references/). Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find HarperDB Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "HarperDB Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geolength.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geolength.md deleted file mode 100644 index 836274ab..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geolength.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -title: geoLength ---- - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geonear.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geonear.md deleted file mode 100644 index 3dca7590..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/geonear.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: geoNear ---- - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` diff --git a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/index.md b/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/index.md deleted file mode 100644 index 4f70f117..00000000 --- a/versioned_docs/version-4.1/sql-guide/sql-geospatial-functions/index.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -# SQL Geospatial Functions - -HarperDB geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: [https://geojson.org/](https://geojson.org/). There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -2. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -3. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, schema and table names may change, but all GeoJSON data will be stored in a column named geo_data. diff --git a/versioned_docs/version-4.1/sql-guide/update.md b/versioned_docs/version-4.1/sql-guide/update.md deleted file mode 100644 index 054a0ead..00000000 --- a/versioned_docs/version-4.1/sql-guide/update.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Update ---- - -# Update - -HarperDB supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` diff --git a/versioned_docs/version-4.1/support.md b/versioned_docs/version-4.1/support.md deleted file mode 100644 index 7b37394d..00000000 --- a/versioned_docs/version-4.1/support.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Support ---- - -# Support - -HarperDB support is available with all paid instances. Support tickets are managed via our [Zendesk portal](https://harperdbhelp.zendesk.com/hc/en-us/requests/new). Once a ticket is submitted the HarperDB team will triage your request and get back to you as soon as possible. Additionally, you can join our [Slack community](https://harperdbcommunity.slack.com/join/shared_invite/zt-e8w6u1pu-2UFAXl_f4ZHo7F7DVkHIDA#/) where HarperDB team members and others in the community are frequently active to help answer questions. - -- [Submit a Support Ticket](https://harperdbhelp.zendesk.com/hc/en-us/requests/new) -- [Join Our Slack Community](https://harperdbcommunity.slack.com/join/shared_invite/zt-e8w6u1pu-2UFAXl_f4ZHo7F7DVkHIDA#/) - ---- - -### Common Issues - -**1 Gigabyte Limit to Request Bodies** - -HarperDB supports the body of a request to be up to 1 GB in size. This limit does not impact the CSV file import function the reads from the local file system or from an external URL. We recommend if you do need to bulk import large record sets that you utilize the CSV import function, especially if you run up on the 1 GB body size limit. Documentation for these functions can be found here. - -**Do not install as sudo** - -HarperDB should be installed using a specific user for HarperDB. This allows you to restrict the permissions that user has and who has access to the HarperDB file system. The reason behind this is that HarperDB files are written directly to the file system, and by using a specific HarperDB user this gives you granular control over who has access to these files. - -**Error: Must execute as User** - -You may have gotten an error like, `Error: Must execute as <>.` This means that you installed HarperDB as `<>`. Because HarperDB stores files directly to the file system, we only allow the HarperDB executable to be run by a single user. This prevents permissions issues on files. For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the database files HarperDB needs. This also keeps HarperDB more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. - ---- - -### Frequently Asked Questions (FAQs) - -**What operating system should I use to run HarperDB?** - -All major operating systems: Linux, Windows, and macOS. However, running HarperDB on Windows and macOS is intended only for development and evaluation purposes. Linux is strongly recommended for production use. - -**How are HarperDB’s SQL and NoSQL capabilities different from other solutions?** - -Many solutions offer NoSQL capability and separate processing for SQL such as in-memory transformation or multi-model support. HarperDB’s unique mechanism for storing each data attribute individually allows for performing NoSQL and SQL operations in real-time on the stored data set. - -**How does HarperDB ensure high availability and consistency?** - -HarperDB's clustering and replication capabilities allow high availability and fault-tolerance; if a server goes down, traffic can be quickly routed to other HarperDB servers that can service requests. HarperDB's replication uses a consistent resolution strategy (last-write-wins by logical timestamp), to ensure eventual consistency. HarperDB offers auditing capabilities that can be enabled to preserve a record of all changes so that mistakes or even malicious data changes are recorded and can be reverted. - -**Is HarperDB ACID-compliant?** - -HarperDB operations are atomic, consist, and isolated per instance. This means that any query will provide an isolated consistent snapshot view of the database (based on when the query started. Updating and insertion operations are also performed atomically; any reads and writes are performed within an atomic, isolated transaction with serialization isolation level, and will rollback if it can not be fully completed successfully. Data is immediately flushed to disk after a write to ensure eventual durability. ACID compliance is not guaranteed across instances in a cluster, rather the eventual consistency will propagate changes with last-write-wins (by last logical timestamp) resolution. - -**How Does HarperDB Secure My Data?** - -HarperDB has role and user based security allowing you to simply and easily control that the right people have access to your data. We also implement a number of authentication mechanisms to ensure the transactions submitted are trusted and secure. - -**Is HarperDB row or column oriented?** - -HarperDB can be considered column oriented, however, the exploded data model creates an interface that is free from either of these orientations. A user can search and update with columnar benefits and be as ACID as row oriented restrictions. - -**What do you mean when you say HarperDB is single model?** - -HarperDB takes every attribute of a database table object and creates a key:value for both the key and its corresponding value. For example, the attribute eye color will be represented by a key "eye-color" and the corresponding value "green" will be represented by a key with the value "green". We use LMDB’s lightning-fast key:value store to underpin all these interrelated keys and values, meaning that every "column" is automatically indexed, and you get huge performance in a tiny package. - -**Are Primary Keys Case-Sensitive?** - -When using HarperDB, primary keys are case-sensitive. This can cause confusion for developers. For example, if you have a user table, it might make sense to use `user.email` as the primary key. This can cause problems as Harper@harperdb.io and harper@harperdb.io would be seen as two different records. We recommend enforcing case on keys within your app to avoid this issue. - -**How Do I Move My HarperDB Data Directory?** - -HarperDB’s data directory can be moved from one location to another by simply updating the `rootPath` in the config file (where the data lives, which you specified during installation) to a new location. - -Next, edit HarperDB’s hdb_boot_properties.file to point HarperDB to the new location by updating the settings_path variable. Substitute the NEW_HDB_ROOT variable in the snippets below with the new path to your new data directory, making sure you escape any slashes. - -On MacOS/OSX - -```bash -sed -i '' -E 's/^(settings_path[[:blank:]]*=[[:blank:]]*).*/\1NEW_HDB_ROOT\/harperdb-config.yaml/' ~/.harperdb/hdb_boot_properties.file -``` - -On Linux - -```bash -sed -i -E 's/^(settings_path[[:blank:]]*=[[:blank:]]*).*/\1NEW_HDB_ROOT\/harperdb-config.yaml/' ~/hdb_boot_properties.file -``` - -Finally, edit the config file in the root folder you just moved: - -- Edit the `rootPath` parameter to reflect the new location of your data directory. diff --git a/versioned_docs/version-4.1/transaction-logging.md b/versioned_docs/version-4.1/transaction-logging.md deleted file mode 100644 index 2f4ee9d6..00000000 --- a/versioned_docs/version-4.1/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -HarperDB offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. HarperDB leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](./clustering/). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.1/upgrade-hdb-instance.md b/versioned_docs/version-4.1/upgrade-hdb-instance.md deleted file mode 100644 index 91808fa0..00000000 --- a/versioned_docs/version-4.1/upgrade-hdb-instance.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: Upgrade a HarperDB Instance ---- - -# Upgrade a HarperDB Instance - -This document describes best practices for upgrading self-hosted HarperDB instances. HarperDB can be upgraded using a combination of npm and built-in HarperDB upgrade scripts. Whenever upgrading your HarperDB installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted HarperDB instances only. All HarperDB Cloud instances will be upgraded by the HarperDB Cloud team. - -## Upgrading - -Upgrading HarperDB is a two-step process. First the latest version of HarperDB must be downloaded from npm, then the HarperDB upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of HarperDB using `npm install -g harperdb`. - - Note `-g` should only be used if you installed HarperDB globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - HarperDB will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -HarperDB supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading HarperDB, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that HarperDB is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure HarperDB is not running: - -```bash -harperdb stop -``` - -Uninstall HarperDB. Note, this step is not required, but will clean up old artifacts of HarperDB. We recommend removing all other HarperDB installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install HarperDB globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start HarperDB - -```bash -harperdb start -``` diff --git a/versioned_docs/version-4.2/administration/_category_.json b/versioned_docs/version-4.2/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/versioned_docs/version-4.2/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/versioned_docs/version-4.2/administration/administration.md b/versioned_docs/version-4.2/administration/administration.md deleted file mode 100644 index 42213dcf..00000000 --- a/versioned_docs/version-4.2/administration/administration.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -HarperDB is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own HarperDB servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database HarperDB is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. HarperDB provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](administration/logging): HarperDB defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). HarperDB has a [`get_backup`](../developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. HarperDB can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -HarperDB provides rapid horizontal scaling capabilities through [node cloning functionality described here](administration/cloning). - -### Replication Transaction Logging - -HarperDB utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](administration/logging). diff --git a/versioned_docs/version-4.2/administration/cloning.md b/versioned_docs/version-4.2/administration/cloning.md deleted file mode 100644 index 2619cd7c..00000000 --- a/versioned_docs/version-4.2/administration/cloning.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that can be pointed to another instance of HarperDB and create a full clone. - -To start clone node install `harperdb` as you would normally but have the clone node environment or command line (CLI) variables set (see below). - -To run clone node either of the following variables must be set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `HDB_LEADER_CLUSTERING_HOST` - _(optional)_ The leader clustering host. This value will be added to the clustering routes on the clone node. If this value is not set, replication will not be setup between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 HDB_LEADER_CLUSTERING_HOST=node-1.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--HDB_LEADER_CLUSTERING_HOST` - _(optional)_ The leader clustering host. This value will be added to the clustering routes on the clone node. If this value is not set, replication will not be setup between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --HDB_LEADER_CLUSTERING_HOST node-1.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -If an instance already exists in the location you are cloning to, clone node will not run. It will instead proceed with starting HarperDB. -This is unless you are cloning overtop (see below) of an existing instance. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, it can be configured through `clone-node-config.yaml`, which should be located in the `ROOTPATH` directory of your clone. -If no configuration is supplied, default values will be used. - -By default: - -- The HarperDB Terms and Conditions will be accepted -- The Root path will be ``/hdb -- The Operations API port will be set to 9925 -- The admin and clustering username and password will be the same as the leader node -- A unique node name will be generated -- All tables will be cloned and have replication added, the subscriptions will be `publish: true` and `subscribe: true` -- The users and roles system tables will be cloned and have replication added both ways -- All components will be cloned -- All routes will be cloned - -**Leader node** - the instance of HarperDB you are cloning.\ -**Clone node** - the new node which will be a clone of the leader node. - -The following configuration is used exclusively by clone node. - -```yaml -databaseConfig: - excludeDatabases: - - database: dev - excludeTables: - - database: prod - table: dog -``` - -Set any databases or tables that you wish to exclude from cloning. - -```yaml -componentConfig: - skipNodeModules: true - exclude: - - name: my-cool-component -``` - -`skipNodeModules` will not include the node_modules directory when clone node is packaging components in `hdb/components`. - -`exclude` can be used to set any components that you do not want cloned. - -```yaml -clusteringConfig: - publishToLeaderNode: true - subscribeToLeaderNode: true -``` - -`publishToLeaderNode`, `subscribeToLeaderNode` the clustering subscription to set up with the leader node. - -```yaml -httpsRejectUnauthorized: false -``` - -Clone node makes http requests to the leader node, `httpsRejectUnauthorized` is used to set if https requests should be verified. - -Any HarperDB configuration can also be used in the `clone-node-config.yaml` file and will be applied to the cloned node, for example: - -```yaml -rootPath: null -operationsApi: - network: - port: 9925 -clustering: - nodeName: null - logLevel: info -logging: - level: error -``` - -_Note: any required configuration needed to install/run HarperDB will be default values or auto-generated unless it is provided in the config file._ - -### Fully connected clone - -A fully connected topology is when all nodes are replicating (publish and subscribing) with all other nodes. A fully connected clone maintains this topology with addition of the new node. When a clone is created, replication is added between the leader and the clone and any nodes the leader is replicating with. For example, if the leader is replicating with node-a and node-b, the clone will replicate with the leader, node-a and node-b. - -To run clone node with the fully connected option simply pass the environment variable `HDB_FULLY_CONNECTED=true` or CLI variable `--HDB_FULLY_CONNECTED true`. - -### Cloning overtop of an existing HarperDB instance - -_Note: this will completely overwrite any system tables (user, roles, nodes, etc.) and any other databases that are named the same as ones that exist on the leader node. It will also do the same for any components._ - -To create a clone over an existing install of HarperDB use the environment `HDB_CLONE_OVERTOP=true` or CLI variable `--HDB_CLONE_OVERTOP true`. - -## Cloning steps - -When run clone node will execute the following steps: - -1. Clone any user defined tables and the hdb_role and hdb_user system tables. -1. Install Harperdb overtop of the cloned tables. -1. Clone the configuration, this includes: - - Copy the clustering routes and clustering user. - - Copy component references. - - Using any provided clone config to populate new cloud node harperdb-config.yaml -1. Clone any components in the `hdb/component` directory. -1. Start the cloned HarperDB Instance. -1. Cluster all cloned tables. - -## Custom database and table pathing - -Currently, clone node will not clone a table if it has custom pathing configured. In this situation the full database that the table is located in will not be cloned. - -If a database has custom pathing (no individual table pathing) it will be cloned, however if no custom pathing is provided in the clone config the database will be stored in the default database directory. - -To provide custom pathing for a database in the clone config follow this configuration: - -```yaml -databases: - : - path: /Users/harper/hdb -``` - -`` the name of the database which will be located at the custom path.\ -`path` the path where the database will reside. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/create-account.md b/versioned_docs/version-4.2/administration/harperdb-studio/create-account.md deleted file mode 100644 index 3d146bb6..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [HarperDB Studio sign up page](https://studio.harperdb.io/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your HarperDB Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -2. Review the Privacy Policy and Terms of Service. -3. Click the sign up for free button. -4. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -5. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/enable-mixed-content.md b/versioned_docs/version-4.2/administration/harperdb-studio/enable-mixed-content.md deleted file mode 100644 index 5a198b91..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -Enabling mixed content is required in cases where you would like to connect the HarperDB Studio to HarperDB Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/index.md b/versioned_docs/version-4.2/administration/harperdb-studio/index.md deleted file mode 100644 index d3cdbaeb..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: HarperDB Studio ---- - -# HarperDB Studio - -HarperDB Studio is the web-based GUI for HarperDB. Studio enables you to administer, navigate, and monitor all of your HarperDB instances in a simple, user friendly interface without any knowledge of the underlying HarperDB API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - ---- - -## How does Studio Work? - -While HarperDB Studio is web based and hosted by us, all database interactions are performed on the HarperDB instance the studio is connected to. The HarperDB Studio loads in your browser, at which point you login to your HarperDB instances. Credentials are stored in your browser cache and are not transmitted back to HarperDB. All database interactions are made via the HarperDB Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -HarperDB Studio enables users to manage both HarperDB Cloud instances and privately hosted instances all from a single UI. All HarperDB instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/instance-configuration.md b/versioned_docs/version-4.2/administration/harperdb-studio/instance-configuration.md deleted file mode 100644 index 64a802af..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/instance-configuration.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -HarperDB instance configuration can be viewed and managed directly through the HarperDB Studio. HarperDB Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. User-installed instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in HarperDB database user_ - -- Created Date (HarperDB Cloud only) - -- Region (HarperDB Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (HarperDB Cloud only) - -- Disk IOPS (HarperDB Cloud only) - -## Update Instance RAM - -HarperDB Cloud instance size and user-installed instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For HarperDB Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if HarperDB Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The HarperDB Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - - If you do have a credit card associated, you will be presented with the updated billing information. - - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The HarperDB instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -2. The instance will begin deleting immediately. - -## Restart Instance - -The HarperDB Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -2. The instance will begin restarting immediately. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/instance-example-code.md b/versioned_docs/version-4.2/administration/harperdb-studio/instance-example-code.md deleted file mode 100644 index d5805510..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/instance-example-code.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Instance Example Code ---- - -# Instance Example Code - -Example code prepopulated with the instance URL and authorization token for the logged in database user can be found on the **example code** page of the HarperDB Studio. Code samples are generated based on the HarperDB API Documentation Postman collection. Code samples accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **example code** in the instance control bar. - -5. Select the appropriate **category** from the left navigation. - -6. Select the appropriate **operation** from the left navigation. - -7. Select your desired language/variant from the **Choose Programming Language** dropdown. - -8. Copy code from the sample code panel using the copy icon. - -## Supported Languages - -Sample code uses two identifiers: **language** and **variant**. - -- **language** is the programming language that the sample code is generated in. - -- **variant** is the methodology or library used by the language to send HarperDB requests. - -The list of available language/variants are as follows: - -| Language | Variant | -| ----------- | ------------- | -| C# | RestSharp | -| cURL | cURL | -| Go | Native | -| HTTP | HTTP | -| Java | OkHttp | -| Java | Unirest | -| JavaScript | Fetch | -| JavaScript | jQuery | -| JavaScript | XHR | -| NodeJs | Axios | -| NodeJs | Native | -| NodeJs | Request | -| NodeJs | Unirest | -| Objective-C | NSURLSession | -| OCaml | Cohttp | -| PHP | cURL | -| PHP | HTTP_Request2 | -| PowerShell | RestMethod | -| Python | http.client | -| Python | Requests | -| Ruby | Net:HTTP | -| Shell | Httpie | -| Shell | wget | -| Swift | URLSession | diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/instance-metrics.md b/versioned_docs/version-4.2/administration/harperdb-studio/instance-metrics.md deleted file mode 100644 index bf95850b..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The HarperDB Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [HarperDB logs](../logging/standard-logging), and [HarperDB Cloud alarms](../../deployments/harperdb-cloud/alarms) (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/instances.md b/versioned_docs/version-4.2/administration/harperdb-studio/instances.md deleted file mode 100644 index b12229e2..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The HarperDB Studio allows you to administer all of your HarperDB instances in one place. HarperDB currently offers the following instance types: - -- **HarperDB Cloud Instance** Managed installations of HarperDB, what we call [HarperDB Cloud](../../deployments/harperdb-cloud/). -- **5G Wavelength Instance** Managed installations of HarperDB running on the Verizon network through AWS Wavelength, what we call [5G Wavelength Instances](../../deployments/harperdb-cloud/verizon-5g-wavelength-instances). _Note, these instances are only accessible via the Verizon network._ -- **User-Installed Instance** Any HarperDB installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. HarperDB stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the HarperDB instances using the standard [HarperDB API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. HarperDB Cloud and user-installed instances are listed together. - -## Create a New Instance - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New HarperDB Cloud Instance + Register User-Installed Instance** card. -1. Select your desired Instance Type. -1. For a HarperDB Cloud Instance or a HarperDB 5G Wavelength Instance, click **Create HarperDB Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial HarperDB instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial HarperDB instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _HarperDB Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ [_More on instance specs_](../../deployments/harperdb-cloud/instance-size-hardware-specs)_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your HarperDB data will reside. Storage is provisioned based on space and IOPS._ [_More on IOPS Impact on Performance_](../../deployments/harperdb-cloud/iops-impact)_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your HarperDB Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register User-Installed Instance - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization for the instance to be created under. -3. Click the **Create New HarperDB Cloud Instance + Register User-Installed Instance** card. -4. Select **Register User-Installed Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a HarperDB super user that is already configured in your HarperDB installation._ - - 1. Enter Instance Password - - _The password of a HarperDB super user that is already configured in your HarperDB installation._ - - 1. Enter Host - - _The host to access the HarperDB instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the HarperDB instance. HarperDB defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _HarperDB instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The HarperDB Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **HarperDB Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **User-Installed Instance** The instance will be removed from the HarperDB Studio only. This does not uninstall HarperDB from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -HarperDB instances can be resized on the [Instance Configuration](./instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a HarperDB user that is already configured in your HarperDB instance._ - -1. Enter the database password. - - _The password of a HarperDB user that is already configured in your HarperDB instance._ - -1. Click **Log In**. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/login-password-reset.md b/versioned_docs/version-4.2/administration/harperdb-studio/login-password-reset.md deleted file mode 100644 index 163a6dee..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your HarperDB Studio Account - -To log into your existing HarperDB Studio account: - -1. Navigate to the [HarperDB Studio](https://studio.harperdb.io/). -2. Enter your email address. -3. Enter your password. -4. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the HarperDB Studio password reset page. -2. Enter your email address. -3. Click **send password reset email**. -4. If the account exists, you will receive an email with a temporary password. -5. Navigate back to the HarperDB Studio login page. -6. Enter your email address. -7. Enter your temporary password. -8. Click **sign in**. -9. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -10. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the HarperDB Studio profile page. -2. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -3. Click the **Update Password** button. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/manage-charts.md b/versioned_docs/version-4.2/administration/harperdb-studio/manage-charts.md deleted file mode 100644 index b9bd2cb2..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/manage-charts.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: Manage Charts ---- - -# Manage Charts - -The HarperDB Studio includes a charting feature within an instance. They are generated in real time based on your existing data and automatically refreshed every 15 seconds. Instance charts can be accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **charts** in the instance control bar. - -## Creating a New Chart - -Charts are generated based on SQL queries, therefore to build a new chart you first need to build a query. Instructions as follows (starting on the charts page described above): - -1. Click **query** in the instance control bar. -1. Enter the SQL query you would like to generate a chart from. - - _For example, using the dog demo data from the API Docs, we can get the average dog age per owner with the following query: `SELECT AVG(age) as avg_age, owner_name FROM dev.dog GROUP BY owner_name`._ - -1. Click **Execute**. -1. Click **create chart** at the top right of the results table. -1. Configure your chart. - 1. Choose chart type. - - _HarperDB Studio offers many standard charting options like line, bar, etc._ - - 1. Choose a data column. - - _This column will be used to plot the data point. Typically, this is the values being calculated in the `SELECT` statement. Depending on the chart type, you can select multiple data columns to display on a single chart._ - - 1. Depending on the chart type, you will need to select a grouping. - - _This could be labeled as x-axis, label, etc. This will be used to group the data, typically this is what you used in your **GROUP BY** clause._ - - 1. Enter a chart name. - - _Used for identification purposes and will be displayed at the top of the chart._ - - 1. Choose visible to all org users toggle. - - _Leaving this option off will limit chart visibility to just your HarperDB Studio user. Toggling it on will enable all users with this Organization to view this chart._ - - 1. Click **Add Chart**. - 1. The chart will now be visible on the **charts** page. - -The example query above, configured as a bar chart, results in the following chart: - -![Average Age per Owner Example](/img/v4.2/ave-age-per-owner-ex.png) - -## Downloading Charts - -HarperDB Studio charts can be downloaded in SVG, PNG, and CSV format. Instructions as follows (starting on the charts page described above): - -1. Identify the chart you would like to export. -1. Click the three bars icon. -1. Select the appropriate download option. -1. The Studio will generate the export and begin downloading immediately. - -## Delete a Chart - -Delete a chart as follows (starting on the charts page described above): - -1. Identify the chart you would like to delete. -1. Click the X icon. -1. Click the **confirm delete chart** button. -1. The chart will be deleted. - -Deleting a chart that is visible to all Organization users will delete it for all users. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/manage-clustering.md b/versioned_docs/version-4.2/administration/harperdb-studio/manage-clustering.md deleted file mode 100644 index 7872fc09..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/manage-clustering.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Manage Clustering ---- - -# Manage Clustering - -HarperDB instance clustering and replication can be configured directly through the HarperDB Studio. It is recommended to read through the clustering documentation first to gain a strong understanding of HarperDB clustering behavior. - -All clustering configuration is handled through the **cluster** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **cluster** in the instance control bar. - -Note, the **cluster** page will only be available to super users. - ---- - -## Initial Configuration - -HarperDB instances do not have clustering configured by default. The HarperDB Studio will walk you through the initial configuration. Upon entering the **cluster** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. - -1. Create a cluster user, read more about this here: Clustering Users and Roles. - - Enter username. - - Enter password. - - Click **Create Cluster User**. - -2. Click **Set Cluster Node Name**. -3. Click **Enable Instance Clustering**. - -At this point the Studio will restart your HarperDB Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside of the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all schemas and tables. - ---- - -## Connect an Instance - -HarperDB Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -2. Identify the instance you would like to connect from the **unconnected instances** panel. - -3. Click the plus icon next the appropriate instance. - -4. If configurations are correct, all schemas will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -HarperDB Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -2. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, schema, and table for replication to be configured. - -2. For publish, click the toggle switch in the **publish** column. - -3. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/manage-functions.md b/versioned_docs/version-4.2/administration/harperdb-studio/manage-functions.md deleted file mode 100644 index 38bbf82e..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/manage-functions.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -title: Manage Functions ---- - -# Manage Functions - -HarperDB Custom Functions are enabled by default and can be configured further through the HarperDB Studio. It is recommended to read through the Custom Functions documentation first to gain a strong understanding of HarperDB Custom Functions behavior. - -All Custom Functions configuration is handled through the **functions** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **functions** in the instance control bar. - -_Note, the **functions** page will only be available to super users._ - -## Manage Projects - -On the **functions** page of the HarperDB Studio you are presented with a functions management screen with the following properties: - -- **projects** - - Displays a list of Custom Functions projects residing on this instance. - -- **/project_name/routes** - - Only displayed if there is an existing project. Displays the routes files contained within the selected project. - -- **/project_name/helpers** - - Only displayed if there is an existing project. Displays the helper files contained within the selected project. - -- **/project_name/static** - - Only displayed if there is an existing project. Displays the static file count and a link to the static files contained within the selected project. Note, static files cannot currently be deployed through the Studio and must be deployed via the [HarperDB API](https://api.harperdb.io/) or manually to the server (not applicable with HarperDB Cloud). - -- **Root File Directory** - - Displays the root file directory where the Custom Functions projects reside on this instance. - -- **Custom Functions Server URL** - - Displays the base URL in which all Custom Functions are accessed for this instance. - -## Create a Project - -HarperDB Custom Functions Projects can be initialized with the following instructions. - -1. If this is your first project, skip this step. Click the plus icon next to the **projects** heading. - -2. Enter the project name in the text box located under the **projects** heading. - -3. Click the check mark icon next the appropriate instance. - -4. The Studio will take a few moments to provision a new project based on the [Custom Functions template](https://github.com/HarperDB/harperdb-custom-functions-template). - -5. The Custom Functions project is now created and ready to modify. - -## Modify a Project - -Custom Functions routes and helper functions can be modified directly through the Studio. From the **functions** page: - -1. Select the appropriate **project**. - -2. Select the appropriate **route** or **helper**. - -3. Modify the code with your desired changes. - -4. Click the save icon at the bottom right of the screen. - - _Note, saving modifications will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -## Create Additional Routes/Helpers - -To create an additional **route** to your Custom Functions project. From the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the plus icon to the right of the **routes** header. - -3. Enter the name of the new route in the textbox that appears. - -4. Click the check icon to create the new route. - - _Note, adding a route will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -To create an additional **helper** to your Custom Functions project. From the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the plus icon to the right of the **helpers** header. - -3. Enter the name of the new helper in the textbox that appears. - -4. Click the check icon to create the new helper. - - _Note, adding a helper will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -## Delete a Project/Route/Helper - -To delete a Custom Functions project from the **functions** page: - -1. Click the minus icon to the right of the **projects** header. - -2. Click the red minus icon to the right of the Custom Functions project you would like to delete. - -3. Confirm deletion by clicking the red check icon. - - _Note, deleting a project will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -To delete a Custom Functions _project route_ from the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the minus icon to the right of the **routes** header. - -3. Click the red minus icon to the right of the Custom Functions route you would like to delete. - -4. Confirm deletion by clicking the red check icon. - - _Note, deleting a route will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -To delete a Custom Functions _project helper_ from the **functions** page: - -1. Select the appropriate Custom Functions **project**. - -2. Click the minus icon to the right of the **helper** header. - -3. Click the red minus icon to the right of the Custom Functions header you would like to delete. - -4. Confirm deletion by clicking the red check icon. - - _Note, deleting a header will restart the Custom Functions server on your HarperDB instance and may result in up to 60 seconds of downtime for all Custom Functions._ - -## Deploy Custom Functions Project to Other Instances - -The HarperDB Studio provides the ability to deploy Custom Functions projects to additional HarperDB instances within the same Studio Organization. To deploy Custom Functions projects to additional instances, starting from the **functions** page: - -1. Select the **project** you would like to deploy. - -2. Click the **deploy** button at the top right. - -3. A list of instances (excluding the current instance) within the organization will be displayed in tabular with the following information: - - **Instance Name**: The name used to describe the instance. - - **Instance URL**: The URL used to access the instance. - - **CF Capable**: Describes if the instance version supports Custom Functions (yes/no). - - **CF Enabled**: Describes if Custom Functions are configured and enabled on the instance (yes/no). - - **Has Project**: Describes if the selected Custom Functions project has been previously deployed to the instance (yes/no). - - **Deploy**: Button used to deploy the project to the instance. - - **Remote**: Button used to remove the project from the instance. _Note, this will only be visible if the project has been previously deployed to the instance._ - -4. In the appropriate instance row, click the **deploy** button. - - _Note, deploying a project will restart the Custom Functions server on the HarperDB instance receiving the deployment and may result in up to 60 seconds of downtime for all Custom Functions._ diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-roles.md b/versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-roles.md deleted file mode 100644 index dafc15a8..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-roles.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -HarperDB users can be managed directly through the HarperDB Studio. It is recommended to read through the users & roles documentation to gain a strong understanding of how they operate. - -Instance role configuration is handled through the roles page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **rules** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -2. Enter the role name. - -3. Click the green check mark. - -4. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -5. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -2. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -3. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the schemas section. - -2. Identify the appropriate role to delete and click the red minus sign in the same row. - -3. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-users.md b/versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-users.md deleted file mode 100644 index 1fb3c2ee..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -HarperDB instance clustering and replication can be configured directly through the HarperDB Studio. It is recommended to read through the clustering documentation first to gain a strong understanding of HarperDB clustering behavior. - -Instance user configuration is handled through the **users** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -HarperDB instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](./manage-instance-roles)._ - -2. Click **Add User**. - -## Edit a User - -HarperDB instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -2. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 2. Click **Update Password**. - -3. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 2. Click **Update Role**. - -4. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 2. Click **Delete User**. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/manage-schemas-browse-data.md b/versioned_docs/version-4.2/administration/harperdb-studio/manage-schemas-browse-data.md deleted file mode 100644 index 8b4061e3..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/manage-schemas-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Schemas / Browse Data ---- - -# Manage Schemas / Browse Data - -Manage instance schemas/tables and browse data in tabular format with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage schemas and tables, add new data, and more. - -## Manage Schemas and Tables - -#### Create a Schema - -1. Click the plus icon at the top right of the schemas section. -2. Enter the schema name. -3. Click the green check mark. - -#### Delete a Schema - -Deleting a schema is permanent and irreversible. Deleting a schema removes all tables and data within it. - -1. Click the minus icon at the top right of the schemas section. -2. Identify the appropriate schema to delete and click the red minus sign in the same row. -3. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired schema from the schemas section. -2. Click the plus icon at the top right of the tables section. -3. Enter the table name. -4. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -5. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired schema from the schemas section. -2. Click the minus icon at the top right of the tables section. -3. Identify the appropriate table to delete and click the red minus sign in the same row. -4. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the schema/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -2. This expands the search filters. -3. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -2. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 2. Click **Import From URL**. - 3. The CSV will load, and you will be redirected back to browse table data. -3. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 2. Navigate to your desired CSV file and select it. - 3. Click **Insert X Records**, where X is the number of records in your CSV. - 4. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -2. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -3. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -4. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -2. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -3. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -2. Click the **delete icon**. -3. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the schema/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/organizations.md b/versioned_docs/version-4.2/administration/harperdb-studio/organizations.md deleted file mode 100644 index 83f99150..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -HarperDB Studio organizations provide the ability to group HarperDB Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique HarperDB Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for HarperDB Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the HarperDB Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the **Create a New Organization** card. -3. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your HarperDB Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -4. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the HarperDB Studio Organizations page. -2. Identify the proper organization card and click the trash can icon. -3. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -4. Click the **Do It** button. - -## Manage Users - -HarperDB Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. In the **add user** box, enter the new user’s email address. -5. Click **Add User**. - -Users may or may not already be HarperDB Studio users when adding them to an organization. If the HarperDB Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a HarperDB Studio account, they will receive an email welcoming them to HarperDB Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the HarperDB Studio Organizations page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their HarperDB Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -6. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_HarperDB billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or user-installed instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -2. Click **Add Coupon**. -3. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.2/administration/harperdb-studio/query-instance-data.md b/versioned_docs/version-4.2/administration/harperdb-studio/query-instance-data.md deleted file mode 100644 index 588f7d4f..00000000 --- a/versioned_docs/version-4.2/administration/harperdb-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the HarperDB Studio with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **query** in the instance control bar. -5. Enter your SQL query in the SQL query window. -6. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The HarperDB Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.2/administration/jobs.md b/versioned_docs/version-4.2/administration/jobs.md deleted file mode 100644 index e08d0126..00000000 --- a/versioned_docs/version-4.2/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -HarperDB Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](https://api.harperdb.io/#0186bc25-b9ae-44e7-bd9e-8edc0f289aa2) - -[csv file load](https://api.harperdb.io/#c4b71011-8a1d-4cb2-8678-31c0363fea5e) - -[csv url load](https://api.harperdb.io/#d1e9f433-e250-49db-b44d-9ce2dcd92d32) - -[import from s3](https://api.harperdb.io/#820b3947-acbe-41f9-858b-2413cabc3a18) - -[delete_records_before](https://api.harperdb.io/#8de87e47-73a8-4298-b858-ca75dc5765c2) - -[export_local](https://api.harperdb.io/#49a02517-ada9-4198-b48d-8707db905be0) - -[export_to_s3](https://api.harperdb.io/#f6393e9f-e272-4180-a42c-ff029d93ddd4) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](https://api.harperdb.io/#d501bef7-dbb7-4714-b535-e466f6583dce) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](https://api.harperdb.io/#4474ca16-e4c2-4740-81b5-14ed98c5eeab) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.2/administration/logging/audit-logging.md b/versioned_docs/version-4.2/administration/logging/audit-logging.md deleted file mode 100644 index f70b4cde..00000000 --- a/versioned_docs/version-4.2/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard HarperDB table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To diable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart HarperDB for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [HarperDB API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.2/administration/logging/index.md b/versioned_docs/version-4.2/administration/logging/index.md deleted file mode 100644 index fa64b5f2..00000000 --- a/versioned_docs/version-4.2/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -HarperDB provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): HarperDB maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): HarperDB uses a standard HarperDB table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): HarperDB stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.2/administration/logging/standard-logging.md b/versioned_docs/version-4.2/administration/logging/standard-logging.md deleted file mode 100644 index 0e56681a..00000000 --- a/versioned_docs/version-4.2/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -HarperDB maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the HarperDB application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -HarperDB clustering utilizes two [Nats](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of HarperDB and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -HarperDB logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the HarperDB API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.2/administration/logging/transaction-logging.md b/versioned_docs/version-4.2/administration/logging/transaction-logging.md deleted file mode 100644 index bf9dfeb3..00000000 --- a/versioned_docs/version-4.2/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -HarperDB offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. HarperDB leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.2/deployments/_category_.json b/versioned_docs/version-4.2/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/versioned_docs/version-4.2/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/versioned_docs/version-4.2/deployments/configuration.md b/versioned_docs/version-4.2/deployments/configuration.md deleted file mode 100644 index 9b4ff5d8..00000000 --- a/versioned_docs/version-4.2/deployments/configuration.md +++ /dev/null @@ -1,745 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -HarperDB is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the operations API root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -All available configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase: `operationsApi`. - -To change a configuration value edit the `harperdb-config.yaml` file and save any changes. HarperDB must be restarted for changes to take effect. - -Alternately, configuration can be changed via environment and/or command line variables or via the API. To access lower level elements, use underscores to append parent/child elements (when used this way elements are case insensitive): - -``` -- Environment variables: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variables: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Calling `set_configuration` through the API: `operationsApi_network_port: 9925` -``` - -\_Note: Component configuration cannot be added or updated via CLI or ENV variables. - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install HarperDB overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -HarperDB is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using HarperDB to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using HarperDB behind a proxy server or application server, all the remote ip addresses will be the same and HarperDB will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the HarperDB component server uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - ---- - -### `threads` - -`threads` - _Type_: number; _Default_: One less than the number of logical cores/ processors - -The `threads` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because HarperDB does have other threads at work), assuming HarperDB is the main service on a server. - -```yaml -threads: 11 -``` - ---- - -### `clustering` - -The `clustering` section configures the clustering engine, this is used to replicate data between instances of HarperDB. - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the HarperDB mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -\ - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your HarperDB cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special HarperDB user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local HarperDB Studio, a simplified GUI for HarperDB hosted on the server. A more comprehensive GUI is hosted by HarperDB at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or HarperDB Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures HarperDB logging across all HarperDB functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root`). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether or not to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: error - -Control the verbosity of text event logs. - -```yaml -logging: - level: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`root` - _Type_: string; _Default_: \/log - -The path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: false - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log HarperDB logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in HarperDB. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any HarperDB servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the HarperDB Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the HarperDB operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the HarperDB operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -#### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -#### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The HarperDB database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the HarperDB application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -#### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: false - -The `compression` option enables compression of records in the database. This can be helpful for very large databases in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`noReadAhead` - _Type_: boolean; _Default_: true - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization, except in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/schema` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by HarperDB. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - ---- - -#### `tls` - -Transport Layer Security - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -#### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level.\ -\ -This configuration should be set before the database and table have been created.\ -\ -The configuration will not create the directories in the path, that must be done by the user.\ - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -\ - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` diff --git a/versioned_docs/version-4.2/deployments/harperdb-cli.md b/versioned_docs/version-4.2/deployments/harperdb-cli.md deleted file mode 100644 index 3a1b0428..00000000 --- a/versioned_docs/version-4.2/deployments/harperdb-cli.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: HarperDB CLI ---- - -# HarperDB CLI - -The HarperDB command line interface (CLI) is used to administer [self-installed HarperDB instances](./install-harperdb/). - -## Installing HarperDB - -To install HarperDB with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, HarperDB installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](./configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -#### Environment Variables - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -#### Command Line Arguments - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -## Starting HarperDB - -To start HarperDB after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -## Stopping HarperDB - -To stop HarperDB once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -## Restarting HarperDB - -To restart HarperDB once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -## Getting the HarperDB Version - -To check the version of HarperDB that is installed run the following command: - -```bash -harperdb version -``` - -## Get all available CLI commands - -To display all available HarperDB CLI commands along with a brief description run: - -```bash -harperdb help -``` - -## Get the status of HarperDB and clustering - -To display the status of the HarperDB process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - -## Backups - -HarperDB uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that HarperDB maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a HarperDB database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with HarperDB shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. diff --git a/versioned_docs/version-4.2/deployments/harperdb-cloud/alarms.md b/versioned_docs/version-4.2/deployments/harperdb-cloud/alarms.md deleted file mode 100644 index 8bf264d9..00000000 --- a/versioned_docs/version-4.2/deployments/harperdb-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -HarperDB Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harperdb-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | ----------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harperdb-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harperdb-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harperdb-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.2/deployments/harperdb-cloud/index.md b/versioned_docs/version-4.2/deployments/harperdb-cloud/index.md deleted file mode 100644 index e5596387..00000000 --- a/versioned_docs/version-4.2/deployments/harperdb-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: HarperDB Cloud ---- - -# HarperDB Cloud - -[HarperDB Cloud](https://studio.harperdb.io/) is the easiest way to test drive HarperDB, it’s HarperDB-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. HarperDB Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new HarperDB Cloud instance in the HarperDB Studio. diff --git a/versioned_docs/version-4.2/deployments/harperdb-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.2/deployments/harperdb-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 676d536d..00000000 --- a/versioned_docs/version-4.2/deployments/harperdb-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While HarperDB Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.2/deployments/harperdb-cloud/iops-impact.md b/versioned_docs/version-4.2/deployments/harperdb-cloud/iops-impact.md deleted file mode 100644 index e2591631..00000000 --- a/versioned_docs/version-4.2/deployments/harperdb-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -HarperDB, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running HarperDB. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that HarperDB performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers HarperDB Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## HarperDB Cloud Storage - -HarperDB Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all HarperDB Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html). - -## Estimating IOPS for HarperDB Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact HarperDB Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. HarperDB utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any HarperDB operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to HarperDB’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, HarperDB should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.2/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.2/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index 9c84cefa..00000000 --- a/versioned_docs/version-4.2/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your HarperDB instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -HarperDB on Verizon 5G Wavelength brings HarperDB closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from HarperDB to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -HarperDB 5G Wavelength Instance Specs While HarperDB 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## HarperDB 5G Wavelength Storage - -HarperDB 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of HarperDB, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger HarperDB volume. Learn more about the [impact of IOPS on performance here](./iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.2/deployments/install-harperdb/index.md b/versioned_docs/version-4.2/deployments/install-harperdb/index.md deleted file mode 100644 index b3cdefdf..00000000 --- a/versioned_docs/version-4.2/deployments/install-harperdb/index.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Install HarperDB ---- - -# Install HarperDB - -## Install HarperDB - -This documentation contains information for installing HarperDB locally. Note that if you’d like to get up and running quickly, you can try a [managed instance with HarperDB Cloud](https://studio.harperdb.io/sign-up). HarperDB is a cross-platform database; we recommend Linux for production use, but HarperDB can run on Windows and Mac as well, for development purposes. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. - -HarperDB runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing HarperDB, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start HarperDB - -Then you can install HarperDB with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -HarperDB will automatically start after installation. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harperdb/linux). - -## With Docker - -If you would like to run HarperDB in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a HarperDB container. - -## Offline Install - -If you need to install HarperDB on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -For more information visit the [HarperDB Command Line Interface](./harperdb-cli) guide. - -## Installation on Less Common Platforms - -HarperDB comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.2/deployments/install-harperdb/linux.md b/versioned_docs/version-4.2/deployments/install-harperdb/linux.md deleted file mode 100644 index 4786dbd8..00000000 --- a/versioned_docs/version-4.2/deployments/install-harperdb/linux.md +++ /dev/null @@ -1,212 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install HarperDB. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to HarperDB with sudo privileges exists -1. An additional volume for storing HarperDB files is attached to the Linux instance -1. Traffic to ports 9925 (HarperDB Operations API) 9926 (HarperDB Application Interface) and 9932 (HarperDB Clustering) is permitted - -While you will need to access HarperDB through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start HarperDB - -Here is an example of installing HarperDB with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing HarperDB with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -HarperDB will automatically start after installation. If you wish HarperDB to start when the OS boots, you have two options - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=HarperDB - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [HarperDB Command Line Interface guide](../../deployments/harperdb-cli) and the [HarperDB Configuration File guide](../../deployments/configuration). diff --git a/versioned_docs/version-4.2/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.2/deployments/upgrade-hdb-instance.md deleted file mode 100644 index 3fd61cfd..00000000 --- a/versioned_docs/version-4.2/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: Upgrade a HarperDB Instance ---- - -# Upgrade a HarperDB Instance - -This document describes best practices for upgrading self-hosted HarperDB instances. HarperDB can be upgraded using a combination of npm and built-in HarperDB upgrade scripts. Whenever upgrading your HarperDB installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted HarperDB instances only. All [HarperDB Cloud instances](./harperdb-cloud/) will be upgraded by the HarperDB Cloud team. - -## Upgrading - -Upgrading HarperDB is a two-step process. First the latest version of HarperDB must be downloaded from npm, then the HarperDB upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of HarperDB using `npm install -g harperdb`. - - Note `-g` should only be used if you installed HarperDB globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - HarperDB will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -HarperDB supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading HarperDB, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that HarperDB is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure HarperDB is not running: - -```bash -harperdb stop -``` - -Uninstall HarperDB. Note, this step is not required, but will clean up old artifacts of HarperDB. We recommend removing all other HarperDB installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install HarperDB globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start HarperDB - -```bash -harperdb start -``` diff --git a/versioned_docs/version-4.2/developers/_category_.json b/versioned_docs/version-4.2/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/versioned_docs/version-4.2/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/versioned_docs/version-4.2/developers/applications/caching.md b/versioned_docs/version-4.2/developers/applications/caching.md deleted file mode 100644 index fe0465fd..00000000 --- a/versioned_docs/version-4.2/developers/applications/caching.md +++ /dev/null @@ -1,274 +0,0 @@ ---- -title: Caching ---- - -# Caching - -HarperDB has integrated support for caching data. With built-in caching capabilities and distributed high-performance low-latency responsiveness, HarperDB makes an ideal data caching server. HarperDB can store cached data as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. HarperDB also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, HarperDB is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](./defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: expiration - The amount of time until a record goes stale. eviction - The amount of time after expiration before a record can be evicted (defaults to zero). scanInterval - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the HarperDB environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyTable } = tables; -MyTable.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected cache. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), HarperDB will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -HarperDB handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - do { - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - } while(true); - } - async get() { -... -``` - -Notification events should always include an `id` to indicate the primary key of the updated record. The event should have a `value` for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, HarperDB will only run the subscribe method on one thread. HarperDB is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in HarperDB. Caching tables also have [subscription capabilities](./caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of HarperDB, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Caching with Replication - -Caching tables can be configured to replicate in HarperDB clusters. When replicating caching tables, there are a couple of options. If each node will be separately connected to the data source and you do not need the subscription data notification events to replicate, you can set the `replicationSource` to `false`. In this case, only data requests (that come through standard requests like REST interface or operations API), will be replicated. However, if you data notification will only be delivered to a single node (at once) and you need the subscription data notification events to replicate, you can set the `replicationSource` to `true` and the incoming events from the subscription will be replicated to all other nodes: - -```javascript -MyTable.sourcedFrom(ThirdPartyAPI, { replicationSource: true }); -``` - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which should also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - let post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). diff --git a/versioned_docs/version-4.2/developers/applications/debugging.md b/versioned_docs/version-4.2/developers/applications/debugging.md deleted file mode 100644 index d37d9074..00000000 --- a/versioned_docs/version-4.2/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -HarperDB components and applications run inside the HarperDB process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the HarperDB entry script with your IDE, or you can start HarperDB in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run HarperDB in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use HarperDB's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by HarperDB. This logger can be used to output messages directly to the HarperDB log using standardized logging level functions, described below. The log level can be set in the [HarperDB Configuration File](../../deployments/configuration). - -HarperDB Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The HarperDB Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the [Studio Status page](../../administration/harperdb-studio/instance-metrics). Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the HarperDB log. diff --git a/versioned_docs/version-4.2/developers/applications/define-routes.md b/versioned_docs/version-4.2/developers/applications/define-routes.md deleted file mode 100644 index b6fd78a7..00000000 --- a/versioned_docs/version-4.2/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -HarperDB's applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with HarperDB's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**Custom Functions Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to HarperDB. The same result could have been achieved by hitting the core HarperDB API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against HarperDB, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to HarperDB- the exact same result could have been achieved by hitting the core HarperDB API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against HarperDB directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard HarperDB Operations API (for example, `hdbCore.preValidation[1](./req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with HarperDB using the operations API. The `request.body` should contain a standard HarperDB operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against HarperDB without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.2/developers/applications/defining-schemas.md b/versioned_docs/version-4.2/developers/applications/defining-schemas.md deleted file mode 100644 index 3d7f6cbe..00000000 --- a/versioned_docs/version-4.2/developers/applications/defining-schemas.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in HarperDB's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that HarperDB uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself.. - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated with a UUID if no primary key is provided. - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -HarperDB supports the following field types in addition to user defined (object) types: - -- String: String/text. -- Int: A 32-bit signed integer (from -2147483648 to 2147483647). -- Long: A 54-bit signed integer (from -9007199254740992 to 9007199254740992). -- Float: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available). -- Boolean: true or false. -- ID: A string (but indicates it is not intended to be legible). -- Any: Any primitive, object, or array is allowed. -- Date: A Date object. - -#### Renaming Tables - -It is important to note that HarperDB does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. diff --git a/versioned_docs/version-4.2/developers/applications/example-projects.md b/versioned_docs/version-4.2/developers/applications/example-projects.md deleted file mode 100644 index 1b90c862..00000000 --- a/versioned_docs/version-4.2/developers/applications/example-projects.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Example Projects ---- - -# Example Projects - -**Library of example HarperDB applications and components:** - -- [Authorization in HarperDB using Okta Customer Identity Cloud](https://www.harperdb.io/post/authorization-in-harperdb-using-okta-customer-identity-cloud), by Yitaek Hwang - -- [How to Speed Up your Applications by Caching at the Edge with HarperDB](https://dev.to/doabledanny/how-to-speed-up-your-applications-by-caching-at-the-edge-with-harperdb-3o2l), by Danny Adams - -- [OAuth Authentication in HarperDB using Auth0 & Node.js](https://www.harperdb.io/post/oauth-authentication-in-harperdb-using-auth0-and-node-js), by Lucas Santos - -- [How To Create a CRUD API with Next.js & HarperDB Custom Functions](https://www.harperdb.io/post/create-a-crud-api-w-next-js-harperdb), by Colby Fayock - -- [Build a Dynamic REST API with Custom Functions](https://harperdb.io/blog/build-a-dynamic-rest-api-with-custom-functions/), by Terra Roush - -- [How to use HarperDB Custom Functions to Build your Entire Backend](https://dev.to/andrewbaisden/how-to-use-harperdb-custom-functions-to-build-your-entire-backend-a2m), by Andrew Baisden - -- [Using TensorFlowJS & HarperDB Custom Functions for Machine Learning](https://harperdb.io/blog/using-tensorflowjs-harperdb-for-machine-learning/), by Kevin Ashcraft - -- [Build & Deploy a Fitness App with Python & HarperDB](https://www.youtube.com/watch?v=KMkmA4i2FQc), by Patrick Löber - -- [Create a Discord Slash Bot using HarperDB Custom Functions](https://geekysrm.hashnode.dev/discord-slash-bot-with-harperdb-custom-functions), by Soumya Ranjan Mohanty - -- [How I used HarperDB Custom Functions to Build a Web App for my Newsletter](https://blog.hrithwik.me/how-i-used-harperdb-custom-functions-to-build-a-web-app-for-my-newsletter), by Hrithwik Bharadwaj - -- [How I used HarperDB Custom Functions and Recharts to create Dashboard](https://blog.greenroots.info/how-to-create-dashboard-with-harperdb-custom-functions-and-recharts), by Tapas Adhikary - -- [How To Use HarperDB Custom Functions With Your React App](https://dev.to/tyaga001/how-to-use-harperdb-custom-functions-with-your-react-app-2c43), by Ankur Tyagi - -- [Build a Web App Using HarperDB’s Custom Functions](https://www.youtube.com/watch?v=rz6prItVJZU), livestream by Jaxon Repp - -- [How to Web Scrape Using Python, Snscrape & Custom Functions](https://hackernoon.com/how-to-web-scrape-using-python-snscrape-and-harperdb), by Davis David - -- [What’s the Big Deal w/ Custom Functions](https://rss.com/podcasts/harperdb-select-star/278933/), Select\* Podcast diff --git a/versioned_docs/version-4.2/developers/applications/index.md b/versioned_docs/version-4.2/developers/applications/index.md deleted file mode 100644 index a161d960..00000000 --- a/versioned_docs/version-4.2/developers/applications/index.md +++ /dev/null @@ -1,376 +0,0 @@ ---- -title: Applications ---- - -# Applications - -## Overview of HarperDB Applications - -HarperDB is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of HarperDB instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that HarperDB 4.2 and greater provides by building a HarperDB component, a fundamental building-block of the HarperDB ecosystem. - -When working through this guide, we recommend you use the [HarperDB Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -## Understanding the Component Application Architecture - -HarperDB provides several types of components. Any package that is added to HarperDB is called a "component", and components are generally categorized as either "applications", which deliver a set of endpoints for users, or "extensions", which are building blocks for features like authentication, additional protocols, and connectors that can be used by other components. Components can be added to the `hdb/components` directory and will be loaded by HarperDB when it starts. Components that are remotely deployed to HarperDB (through the studio or the operation API) are installed into the hdb/node_modules directory. Using `harperdb run .` or `harperdb dev .` allows us to specifically load a certain application in addition to any that have been manually added to `hdb/components` or installed in `node_modules`. - -```mermaid -flowchart LR - Client(Client)-->Endpoints - Client(Client)-->HTTP - Client(Client)-->Extensions - subgraph HarperDB - direction TB - Applications(Applications)-- "Schemas" --> Tables[(Tables)] - Applications-->Endpoints[/Custom Endpoints/] - Applications-->Extensions - Endpoints-->Tables - HTTP[/REST/HTTP/]-->Tables - Extensions[/Extensions/]-->Tables - end -``` - -## Getting up and Running - -### Pre-Requisites - -We assume you are running HarperDB version 4.2 or greater, which supports HarperDB Application architecture (in previous versions, this is 'custom functions'). - -### Scaffolding our Application Directory - -Let's create and initialize a new directory for our application. It is recommended that you start by using the [HarperDB application template](https://github.com/HarperDB/application-template). Assuming you have `git` installed, you can create your project directory by cloning: - -```shell -> git clone https://github.com/HarperDB/application-template my-app -> cd my-app -``` - -
- -You can also start with an empty application directory if you'd prefer. - -To create your own application from scratch, you'll may want to initialize it as an npm package with the \`type\` field set to \`module\` in the \`package.json\` so that you can use the EcmaScript module syntax used in this tutorial: - -```shell -> mkdir my-app -> cd my-app -> npm init -y esnext -``` - -
- -
- -If you want to version control your application code, you can adjust the remote URL to your repository. - -Here's an example for a github repo: - -```shell -> git remote set-url origin git@github.com:// -``` - -Locally developing your application and then committing your app to a source control is a great way to manage your code and configuration, and then you can [directly deploy from your repository](#deploying-your-application). - -
- -## Creating our first Table - -The core of a HarperDB application is the database, so let's create a database table! - -A quick and expressive way to define a table is through a [GraphQL Schema](https://graphql.org/learn/schema). Using your editor of choice, edit the file named `schema.graphql` in the root of the application directory, `my-app`, that we created above. To create a table, we will need to add a `type` of `@table` named `Dog` (and you can remove the example table in the template): - -```graphql -type Dog @table { - # properties will go here soon -} -``` - -And then we'll add a primary key named `id` of type `ID`: - -_(Note: A GraphQL schema is a fast method to define tables in HarperDB, but you are by no means required to use GraphQL to query your application, nor should you necessarily do so)_ - -```graphql -type Dog @table { - id: ID @primaryKey -} -``` - -Now we tell HarperDB to run this as an application: - -```shell -> harperdb dev . # tell HarperDB cli to run current directory as an application in dev mode -``` - -HarperDB will now create the `Dog` table and its `id` attribute we just defined. Not only is this an easy way to get create a table, but this schema is included in our application, which will ensure that this table exists wherever we deploy this application (to any HarperDB instance). - -## Adding Attributes to our Table - -Next, let's expand our `Dog` table by adding additional typed attributes for dog `name`, `breed` and `age`. - -```graphql -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} -``` - -This will ensure that new records must have these properties with these types. - -Because we ran `harperdb dev .` earlier (dev mode), HarperDB is now monitoring the contents of our application directory for changes and reloading when they occur. This means that once we save our schema file with these new attributes, HarperDB will automatically reload our application, read `my-app/schema.graphql` and update the `Dog` table and attributes we just defined. The dev mode will also ensure that any logging or errors are immediately displayed in the console (rather only in the log file). - -As a NoSQL database, HarperDB supports heterogeneous records (also referred to as documents), so you can freely specify additional properties on any record. If you do want to restrict the records to only defined properties, you can always do that by adding the `sealed` directive: - -```graphql -type Dog @table @sealed { - id: ID @primaryKey - name: String - breed: String - age: Int - tricks: [String] -} -``` - -If you are using HarperDB Studio, we can now [add JSON-formatted records](../administration/harperdb-studio/manage-schemas-browse-data) to this new table in the studio or upload data as [CSV from a local file or URL](../administration/harperdb-studio/manage-schemas-browse-data#load-csv-data). A third, more advanced, way to add data to your database is to use the [operations API](./operations-api), which provides full administrative control over your new HarperDB instance and tables. - -## Adding an Endpoint - -Now that we have a running application with a database (with data if you imported any data), let's make this data accessible from a RESTful URL by adding an endpoint. To do this, we simply add the `@export` directive to our `Dog` table: - -```graphql -type Dog @table @export { - id: ID @primaryKey - name: String - breed: String - age: Int - tricks: [String] -} -``` - -By default the application HTTP server port is `9926` (this can be [configured here](../deployments/configuration#http)), so the local URL would be [http://localhost:9926/Dog/](http://localhost:9926/Dog/) with a full REST API. We can PUT or POST data into this table using this new path, and then GET or DELETE from it as well (you can even view data directly from the browser). If you have not added any records yet, we could use a PUT or POST to add a record. PUT is appropriate if you know the id, and POST can be used to assign an id: - -```http -POST /Dog/ -Content-Type: application/json - -{ - "name": "Harper", - "breed": "Labrador", - "age": 3, - "tricks": ["sits"] -} -``` - -With this a record will be created and the auto-assigned id will be available through the `Location` header. If you added a record, you can visit the path `/Dog/` to view that record. Alternately, the curl command `curl http://localhost:9926/Dog/` will achieve the same thing. - -## Authenticating Endpoints - -These endpoints automatically support `Basic`, `Cookie`, and `JWT` authentication methods. See the documentation on [security](./security) for more information on different levels of access. - -By default, HarperDB also automatically authorizes all requests from loopback IP addresses (from the same computer) as the superuser, to make it simple to interact for local development. If you want to test authentication/authorization, or enforce stricter security, you may want to disable the [`authentication.authorizeLocal` setting](../deployments/configuration#authentication). - -### Content Negotiation - -These endpoints support various content types, including `JSON`, `CBOR`, `MessagePack` and `CSV`. Simply include an `Accept` header in your requests with the preferred content type. We recommend `CBOR` as a compact, efficient encoding with rich data types, but `JSON` is familiar and great for web application development, and `CSV` can be useful for exporting data to spreadsheets or other processing. - -HarperDB works with other important standard HTTP headers as well, and these endpoints are even capable of caching interaction: - -``` -Authorization: Basic -Accept: application/cbor -If-None-Match: "etag-id" # browsers can automatically provide this -``` - -## Querying - -Querying your application database is straightforward and easy, as tables exported with the `@export` directive are automatically exposed via [REST endpoints](./rest). Simple queries can be crafted through [URL query parameters](https://en.wikipedia.org/wiki/Query_string). - -In order to maintain reasonable query speed on a database as it grows in size, it is critical to select and establish the proper indexes. So, before we add the `@export` declaration to our `Dog` table and begin querying it, let's take a moment to target some table properties for indexing. We'll use `name` and `breed` as indexed table properties on our `Dog` table. All we need to do to accomplish this is tag these properties with the `@indexed` directive: - -```graphql -type Dog @table { - id: ID @primaryKey - name: String @indexed - breed: String @indexed - owner: String - age: Int - tricks: [String] -} -``` - -And finally, we'll add the `@export` directive to expose the table as a RESTful endpoint - -```graphql -type Dog @table @export { - id: ID @primaryKey - name: String @indexed - breed: String @indexed - owner: String - age: Int - tricks: [String] -} -``` - -Now we can start querying. Again, we just simply access the endpoint with query parameters (basic GET requests), like: - -``` -http://localhost:9926/Dog/?name=Harper -http://localhost:9926/Dog/?breed=Labrador -http://localhost:9926/Dog/?breed=Husky&name=Balto&select=id,name,breed -``` - -Congratulations, you now have created a secure database application backend with a table, a well-defined structure, access controls, and a functional REST endpoint with query capabilities! See the [REST documentation for more information on HTTP access](./rest) and see the [Schema reference](./applications/defining-schemas) for more options for defining schemas. - -## Deploying your Application - -This guide assumes that you're building a HarperDB application locally. If you have a cloud instance available, you can deploy it by doing the following: - -- Commit and push your application component directory code (i.e., the `my-app` directory) to a Github repo. In this tutorial we started with a clone of the application-template. To commit and push to your own repository, change the origin to your repo: `git remote set-url origin git@github.com:your-account/your-repo.git` -- Go to the applications section of your target cloud instance in the [HarperDB Studio](https://studio.harperdb.io) -- In the left-hand menu of the applications IDE, click 'deploy' and specify a package location reference that follows the [npm package specification](https://docs.npmjs.com/cli/v8/using-npm/package-spec) (i.e., a string like `HarperDB/Application-Template` or a URL like `https://github.com/HarperDB/application-template`, for example, that npm knows how to install). - -You can also deploy your application from your repository by directly using the [`deploy_component` operation](./operations-api/components#deploy-component). - -Once you have deployed your application to a HarperDB cloud instance, you can start scaling your application by adding additional instances in other regions. - -With the help of a global traffic manager/load balancer configured, you can distribute incoming requests to the appropriate server. You can deploy and re-deploy your application to all the nodes in your mesh. - -Now, with an application that you can deploy, update, and re-deploy, you have an application that is horizontally and globally scalable! - -## Custom Functionality with JavaScript - -So far we have built an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in HarperDB. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In HarperDB, data is accessed through our [Resource API](../reference/resource), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the HarperDB provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - get(query) { - this.humanAge = 15 + this.age * 5; // silly calculation of human age equivalent - return super.get(query); - } -} -``` - -Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](./rest#selectproperties)). - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -And next we will use this table in our `get()` method. We will call the new table's (static) `get()` method to retrieve a breed by id. To do this correctly, we access the table using our current context by passing in `this` as the second argument. This is important because it ensures that we are accessing the data atomically, in a consistent snapshot across tables. This provides automatically tracking of most recently updated timestamps across resources for caching purposes. This allows for sharing of contextual metadata (like user who requested the data), and ensure transactional atomicity for any writes (not needed in this get operation, but important for other operations). The resource methods are automatically wrapped with a transaction (will commit/finish when the method completes), and this allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - async get(query) { - let breedDescription = await Breed.get(this.breed, this); - this.breedDescription = breedDescription; - return super.get(query); - } -} -``` - -The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - async post(data) { - if (data.action === 'add-trick') this.tricks.push(data.trick); - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). - -We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post` method or `put` method to do this, but we may want to separate the logic so these methods can be called separately without authorization checks. The [Resource API](../reference/resource) defines `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete`, or to easily configure individual capabilities. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - allowUpdate(user) { - return this.owner === user.username; - } -} -``` - -Any methods that are not defined will fall back to HarperDB's default authorization procedure based on users' roles. If you are using/extending a table, this is based on HarperDB's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to url like / to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get() { - return (await fetch(`https://best-dog-site.com/${this.getId()}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](./applications/caching) provides much more information on how to use HarperDB's powerful caching capabilities and set up data sources. - -HarperDB provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../reference/globals) and the [Resource interface](../reference/resource). - -## Configuring Applications/Components - -Every application or component can define their own configuration in a `config.yaml`. If you are using the application template, you will have a [default configuration in this config file](https://github.com/HarperDB/application-template/blob/main/config.yaml) (which is default configuration if no config file is provided). Within the config file, you can configure how different files and resources are loaded and handled. The default configuration file itself is documented with directions. Each entry can specify any `files` that the loader will handle, and can also optionally specify what, if any, URL `path`s it will handle. A path of `/` means that the root URLs are handled by the loader, and a path of `.` indicates that the URLs that start with this application's name are handled. - -This config file allows you define a location for static files, as well (that are directly delivered as-is for incoming HTTP requests). - -Each configuration entry can have the following properties, in addition to properties that may be specific to the individual component: - -- `files`: This specifies the set of files that should be handled the component. This is a glob pattern, so a set of files can be specified like "directory/\*\*". -- `path`: This is the URL path that is handled by this component. -- `root`: This specifies the root directory for mapping file paths to the URLs. For example, if you want all the files in `web/**` to be available in the root URL path via the static handler, you could specify a root of `web`, to indicate that the web directory maps to the root URL path. -- `package`: This is used to specify that this component is a third party package, and can be loaded from the specified package reference (which can be an NPM package, Github reference, URL, etc.). - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. HarperDB includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](./applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as HarperDB's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the HarperDB's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, HarperDB will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/versioned_docs/version-4.2/developers/components/drivers.md b/versioned_docs/version-4.2/developers/components/drivers.md deleted file mode 100644 index 3296031f..00000000 --- a/versioned_docs/version-4.2/developers/components/drivers.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: Drivers -description: >- - Industry standard tools to real-time HarperDB data with BI, analytics, - reporting and data visualization technologies. ---- - -# Drivers - -
DriverDocsDownload
Power BIPowerBI DocsWindows
TableauTableau DocsWindows
Mac
Driver JAR
ExcelExcel DocsWindows
JDBCJDBC DocsWindows
Mac
Driver JAR
ODBCODBC DocsWindows
Mac
Linux (RPM)
Linux (DEB)
ADOADO DocsWindows
CmdletsCmdlets DocsWindows
SSISSSIS DocsWindows
diff --git a/versioned_docs/version-4.2/developers/components/google-data-studio.md b/versioned_docs/version-4.2/developers/components/google-data-studio.md deleted file mode 100644 index 4ee8d848..00000000 --- a/versioned_docs/version-4.2/developers/components/google-data-studio.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Google Data Studio ---- - -# Google Data Studio - -[Google Data Studio](https://datastudio.google.com/) is a free collaborative visualization tool which enables users to build configurable charts and tables quickly. The HarperDB Google Data Studio connector seamlessly integrates your HarperDB data with Google Data Studio so you can build custom, real-time data visualizations. - -The HarperDB Google Data Studio Connector is subject to our [Terms of Use](https://harperdb.io/legal/harperdb-cloud-terms-of-service/) and [Privacy Policy](https://harperdb.io/legal/privacy-policy/). - -## Requirements - -The HarperDB database must be accessible through the Internet in order for Google Data Studio servers to access it. The database may be hosted by you or via [HarperDB Cloud](../../deployments/harperdb-cloud/). - -## Get Started - -Get started by selecting the HarperDB connector from the [Google Data Studio Partner Connector Gallery](https://datastudio.google.com/u/0/datasources/create). - -1. Log in to [https://datastudio.google.com/](https://datastudio.google.com/). -1. Add a new Data Source using the HarperDB connector. The current release version can be added as a data source by following this link: [HarperDB Google Data Studio Connector](https://datastudio.google.com/datasources/create?connectorId=AKfycbxBKgF8FI5R42WVxO-QCOq7dmUys0HJrUJMkBQRoGnCasY60_VJeO3BhHJPvdd20-S76g). -1. Authorize the connector to access other servers on your behalf (this allows the connector to contact your database). -1. Enter the Web URL to access your database (preferably with HTTPS), as well as the Basic Auth key you use to access the database. Just include the key, not the word "Basic" at the start of it. -1. Check the box for "Secure Connections Only" if you want to always use HTTPS connections for this data source; entering a Web URL that starts with https:// will do the same thing, if you prefer. -1. Check the box for "Allow Bad Certs" if your HarperDB instance does not have a valid SSL certificate. [HarperDB Cloud](../../deployments/harperdb-cloud/) always has valid certificates, and so will never require this to be checked. Instances you set up yourself may require this, if you are using self-signed certs. If you are using [HarperDB Cloud](../../deployments/harperdb-cloud/) or another instance you know should always have valid SSL certificates, do not check this box. -1. Choose your Query Type. This determines what information the configuration will ask for after pressing the Next button. - - Table will ask you for a Schema and a Table to return all fields of using `SELECT *`. - - SQL will ask you for the SQL query you’re using to retrieve fields from the database. You may `JOIN` multiple tables together, and use HarperDB specific SQL functions, along with the usual power SQL grants. -1. When all information is entered correctly, press the Connect button in the top right of the new Data Source view to generate the Schema. You may also want to name the data source at this point. If the connector encounters any errors, a dialog box will tell you what went wrong so you can correct the issue. -1. If there are no errors, you now have a data source you can use in your reports! You may change the types of the generated fields in the Schema view if you need to (for instance, changing a Number field to a specific currency), as well as creating new fields from the report view that do calculations on other fields. - -## Considerations - -- Both Postman and the [HarperDB Studio](../../administration/harperdb-studio/) app have ways to convert a user:password pair to a Basic Auth token. Use either to create the token for the connector’s user. - - You may sign out of your current user by going to the instances tab in HarperDB Studio, then clicking on the lock icon at the top-right of a given instance’s box. Click the lock again to sign in as any user. The Basic Auth token will be visible in the Authorization header portion of any code created in the Sample Code tab. -- It’s highly recommended that you create a read-only user role in HarperDB Studio, and create a user with that role for your data sources to use. This prevents that authorization token from being used to alter your database, should someone else ever get ahold of it. -- The RecordCount field is intended for use as a metric, for counting how many instances of a given set of values appear in a report’s data set. -- _Do not attempt to create fields with spaces in their names_ for any data sources! Google Data Studio will crash when attempting to retrieve a field with such a name, producing a System Error instead of a useful chart on your reports. Using CamelCase or snake_case gets around this. diff --git a/versioned_docs/version-4.2/developers/components/index.md b/versioned_docs/version-4.2/developers/components/index.md deleted file mode 100644 index 6fc93eb0..00000000 --- a/versioned_docs/version-4.2/developers/components/index.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Components ---- - -# Components - -HarperDB is a highly extensible database application platform with support for a rich variety of composable modular components and components that can be used and combined to build applications and add functionality to existing applications. HarperDB tools, components, and add-ons can be found in a few places: - -- [SDK libraries](components/sdks) are available for connecting to HarperDB from different languages. -- Drivers are available for connecting to HarperDB from different products and tools. -- [HarperDB-Add-Ons repositories](https://github.com/orgs/HarperDB-Add-Ons/repositories) lists various templates and add-ons for HarperDB. -- [HarperDB repositories](https://github.com/orgs/HarperDB-Add-Ons/repositories) include additional tools for HarperDB. -- You can also [search github.com for ever-growing list of projects that use, or work with, HarperDB](https://github.com/search?q=harperdb&type=repositories) -- [Google Data Studio](components/google-data-studio) is a visualization tool for building charts and tables from HarperDB data. - -## Components - -There are four general categories of components for HarperDB. The most common is applications. Applications are simply a component that delivers complete functionality through an external interface that it defines, and is usually composed of other components. See [our guide to building applications for getting started](../../developers/applications). - -A data source component can implement the Resource API to customize access to a table or provide access to an external data source. External data source components are used to retrieve and access data from other sources. - -The next two are considered extension components. Server protocol extension components provide and define ways for clients to access data and can be used to extend or create new protocols. - -Server resource components implement support for different types of files that can be used as resources in applications. HarperDB includes support for using JavaScript modules and GraphQL Schemas as resources, but resource components may add support for different file types like HTML templates (like JSX), CSV data, and more. - -## Server components - -Server components can be easily be added and configured by simply adding an entry to your harperdb-config.yaml: - -```yaml -my-server-component: - package: 'HarperDB-Add-Ons/package-name' # this can be any valid github or npm reference - port: 4321 -``` - -## Writing Extension Components - -You can write your own extensions to build new functionality on HarperDB. See the [writing extension components documentation](components/writing-extensions) for more information. diff --git a/versioned_docs/version-4.2/developers/components/installing.md b/versioned_docs/version-4.2/developers/components/installing.md deleted file mode 100644 index c9e935d0..00000000 --- a/versioned_docs/version-4.2/developers/components/installing.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: Installing ---- - -# Installing - -Components can be easily added by adding a new top level element to your `harperdb-config.yaml` file. - -The configuration comprises two values: - -- component name - can be anything, as long as it follows valid YAML syntax. -- `package` - a reference to your component. - -```yaml -myComponentName: - package: HarperDB-Add-Ons/package -``` - -Under the hood HarperDB is calling npm install on all components, this means that the package value can be any valid npm reference such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from NPM -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -When HarperDB is run or restarted it checks to see if there are any new or updated components. If there are, it will dynamically create a package.json file in the `rootPath` directory and call `npm install`. - -NPM will install all the components in `/node_moduels`. - -The package.json file that is created will look something like this. - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -## Installing components using the operations API - -To add a component using the operations API use the `deploy_component` operation. - -```json -{ - "operation": "deploy_component", - "project": "my-cool-component", - "package": "HarperDB-Add-Ons/package/mycc" -} -``` - -Another option is to pass `deploy_component` a base64-encoded string representation of your component as a `.tar` file. HarperDB can generate this via the `package_component` operation. When deploying with a payload, your component will be deployed to your `/components` directory. Any components in this directory will be automatically picked up by HarperDB. - -```json -{ - "operation": "deploy_component", - "project": "my-cool-component", - "payload": "NzY1IAAwMDAwMjQgADAwMDAwMDAwMDAwIDE0NDIwMDQ3...." -} -``` diff --git a/versioned_docs/version-4.2/developers/components/operations.md b/versioned_docs/version-4.2/developers/components/operations.md deleted file mode 100644 index 691ce4bb..00000000 --- a/versioned_docs/version-4.2/developers/components/operations.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Operations ---- - -# Operations - -One way to manage applications and components is through [HarperDB Studio](../../administration/harperdb-studio/). It performs all the necessary operations automatically. To get started, navigate to your instance in HarperDB Studio and click the subnav link for "applications". Once configuration is complete, you can manage and deploy applications in minutes. - -HarperDB Studio manages your applications using nine HarperDB operations. You may view these operations within our [API Docs](../operations-api/). A brief overview of each of the operations is below: - -- **components_status** - - Returns the state of the applications server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -- **get_components** - - Returns an array of projects within the applications root project directory. - -- **get_component_file** - - Returns the content of the specified file as text. HarperDB Studio uses this call to render the file content in its built-in code editor. - -- **set_component_file** - - Updates the content of the specified file. HarperDB Studio uses this call to save any changes made through its built-in code editor. - -- **drop_component_file** - - Deletes the specified file. - -- **add_component_project** - - Creates a new project folder in the applications root project directory. It also inserts into the new directory the contents of our applications Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -- **drop_component_project** - - Deletes the specified project folder and all of its contents. - -- **package_component_project** - - Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns that string to the user. - -- **deploy_component_project** - - Takes the output of package_component_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the applications root project directory. diff --git a/versioned_docs/version-4.2/developers/components/sdks.md b/versioned_docs/version-4.2/developers/components/sdks.md deleted file mode 100644 index 04f87e6f..00000000 --- a/versioned_docs/version-4.2/developers/components/sdks.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: SDKs -description: >- - Software Development Kits available for connecting to HarperDB from different - languages. ---- - -# SDKs - -| SDK/Tool | Description | Installation | -| ------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------- | -| [HarperDB.NET.Client](https://www.nuget.org/packages/HarperDB.NET.Client) | A Dot Net Core client to execute operations against HarperDB | `dotnet add package HarperDB.NET.Client --version 1.1.0` | -| [Websocket Client](https://www.npmjs.com/package/harperdb-websocket-client) | A Javascript client for real-time access to HarperDB transactions | `npm i -s harperdb-websocket-client` | -| [Gatsby HarperDB Source](https://www.npmjs.com/package/gatsby-source-harperdb) | Use HarperDB as the data source for a Gatsby project at the build time | `npm i -s gatsby-source-harperdb` | -| [HarperDB.EntityFrameworkCore](https://www.nuget.org/packages/HarperDB.EntityFrameworkCore) | The HarperDB EntityFrameworkCore Provider Package for .NET 6.0 | `dotnet add package HarperDB.EntityFrameworkCore --version 1.0.0` | -| [Python SDK](https://pypi.org/project/harperdb/) | Python3 implementations of HarperDB API functions with wrappers for an object-oriented interface | `pip3 install harperdb` | -| [HarperDB Flutter SDK](https://github.com/HarperDB/harperdb-sdk-flutter) | A HarperDB SDK for Flutter | `flutter pub add harperdb` | -| [React Hook](https://www.npmjs.com/package/use-harperdb) | A ReactJS Hook for HarperDB | `npm i -s use-harperdb` | -| [Node Red Node](https://flows.nodered.org/node/node-red-contrib-harperdb) | Easy drag and drop connections to HarperDB using the Node-Red platform | `npm i -s node-red-contrib-harperdb` | -| [NodeJS SDK](https://www.npmjs.com/package/harperive) | A HarperDB SDK for NodeJS | `npm i -s harperive` | -| [HarperDB Cargo Crate](https://crates.io/crates/harperdb) | A HarperDB SDK for Rust | `Cargo.toml > harperdb = '1.0.0'` | diff --git a/versioned_docs/version-4.2/developers/components/writing-extensions.md b/versioned_docs/version-4.2/developers/components/writing-extensions.md deleted file mode 100644 index 1065f613..00000000 --- a/versioned_docs/version-4.2/developers/components/writing-extensions.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Writing Extensions ---- - -# Writing Extensions - -HarperDB is a highly extensible database application platform with support for a rich variety of composable modular components and extensions that can be used and combined to build applications and add functionality to existing applications. Here we describe the different types of components/extensions that can be developed for HarperDB and how to create them. - -There are three general categories of components for HarperDB: - -- **protocol extensions** that provide and define ways for clients to access data -- **resource extensions** that handle and interpret different types of files -- **consumer data sources** that provide a way to access and retrieve data from other sources. - -Server protocol extensions can be used to implement new protocols like MQTT, AMQP, Kafka, or maybe a retro-style Gopher interface. It can also be used to augment existing protocols like HTTP with "middleware" that can add authentication, analytics, or additional content negotiation, or add layer protocols on top of WebSockets. - -Server resource extensions implement support for different types of files that can be used as resources in applications. HarperDB includes support for using JavaScript modules and GraphQL Schemas as resources, but resource extensions could be added to support different file types like HTML templates (like JSX), CSV data, and more. - -Consumer data source components are used to retrieve and access data from other sources, and can be very useful if you want to use HarperDB to cache or use data from other databases like MySQL, Postgres, or Oracle, or subscribe to data from messaging brokers (again possibly Kafka, NATS, etc.). - -These are not mutually exclusive, you may build components that fulfill any or all of these roles. - -## Server Extensions - -Server Extensions are implemented as JavaScript packages/modules and interact with HarperDB through a number of possible hooks. A component can be defined as an extension by specifying the extensionModule in the config.yaml: - -```yaml -extensionModule: './entry-module-name.js' -``` - -### Module Initialization - -Once a user has configured an extension, HarperDB will attempt to load the extension package specified by `package` property. Once loaded, there are several functions that can be exported that will be called by HarperDB: - -`export function start(options: { port: number, server: {}})` If defined, this will be called on the initialization of the extension. The provided `server` property object includes a set of additional entry points for utilizing or layering on top of other protocols (and when implementing a new protocol, you can add your own entry points). The most common entry is to provide an HTTP middleware layer. This looks like: - -```javascript -export function start(options: { port: number, server: {}}) { - options.server.http(async (request, nextLayer) => { - // we can directly return a response here, or do some processing on the request and delegate to the next layer - let response = await nextLayer(request); - return response; - }); -} -``` - -Here, the `request` object will have the following structure (this is based on Node's request, but augmented to conform to a subset of the [WHATWG Request API](https://developer.mozilla.org/en-US/docs/Web/API/Request)): - -```typescript -interface Request { - method: string; - headers: Headers; // use request.headers.get(headerName) to get header values - body: Stream; - data: any; // deserialized data from the request body -} -``` - -The returned `response` object should have the following structure (again, following a structural subset of the [WHATWG Response API](https://developer.mozilla.org/en-US/docs/Web/API/Response)): - -```typescript -interface Response { - status?: number; - headers?: {}; // an object with header name/values - data?: any; // object/value that will be serialized into the body - body?: Stream; -} -``` - -If you were implementing an authentication extension, you could get authentication information from the request and use it to add the `user` property to the request: - -```javascript -export function start(options: { port: number, server: {}, resources: Map}) { - options.server.http((request, nextLayer) => { - let authorization = request.headers.authorization; - if (authorization) { - // get some token for the user and determine the user - // if we want to use harperdb's user database - let user = server.getUser(username, password); - request.user = user; // authenticate user object goes on the request - } - // continue on to the next layer - return nextLayer(request); - }); - // if you needed to add a login resource, could add it as well: - resources.set('/login', LoginResource); -} -``` - -If you were implementing a new protocol, you can directly interact with the sockets and listen for new incoming TCP connections: - -```javascript -export function start(options: { port: number, server: {}}) { - options.server.socket((socket) => { - }); -}) -``` - -### Resource Handling - -Typically, servers not only communicate with clients, but serve up meaningful data based on the resources within the server. While resource extensions typically handle defining resources, once resources are defined, they can be consumed by server extensions. The `resources` argument provides access to the set of all the resources that have been defined. A server can call `resources.getMatch(path)` to get the resource associated with the URL path. - -## Resource Extensions - -Resource extensions allow us to handle different files and make them accessible to servers as resources, following the common [Resource API](../../reference/resource). To implement a resource extension, you export a function called `handleFile`. Users can then configure which files that should be handled by your extension. For example, if we had implemented an EJS handler, it could be configured as: - -```yaml - module: 'ejs-extension', - path: '/templates/*.ejs' -``` - -And in our extension module, we could implement `handleFile`: - -```javascript -export function handleFile?(contents, relative_path, file_path, resources) { - // will be called for each .ejs file. - // We can then add the generate resource: - resources.set(relative_path, GeneratedResource); -} -``` - -We can also implement a handler for directories. This can be useful for implementing a handler for broader frameworks that load their own files, like Next.js or Remix, or a static file handler. HarperDB includes such an extension for fastify's auto-loader that loads a directory of route definitions. This hook looks like: - -```javascript -export function handleDirectory?(relative_path, path, resources) { -} -``` - -Note that these hooks are not mutually exclusive. You can write an extension that implements any or all of these hooks, potentially implementing a custom protocol and file handling. - -## Data Source Components - -Data source components implement the Resource interface to provide access to various data sources, which may be other APIs, databases, or local storage. Components that implement this interface can then be used as a source for caching tables, can be accessed as part of endpoint implementations, or even used as endpoints themselves. See the [Resource documentation](../../reference/resource) for more information on implementing new resources. - -## Content Type Extensions - -HarperDB uses content negotiation to determine how to deserialize content incoming data from HTTP requests (and any other protocols that support content negotiation) and to serialize data into responses. This negotiation is performed by comparing the `Content-Type` header with registered content type handler to determine how to deserialize content into structured data that is processed and stored, and comparing the `Accept` header with registered content type handlers to determine how to serialize structured data. HarperDB comes with a rich set of content type handlers including JSON, CBOR, MessagePack, CSV, Event-Stream, and more. However, you can also add your own content type handlers by adding new entries (or even replacing existing entries) to the `contentTypes` exported map from the `server` global (or `harperdb` export). This map is keyed by the MIME type, and the value is an object with properties (all optional): `serialize(data): Buffer|Uint8Array|string`: If defined, this will be called with the data structure and should return the data serialized as binary data (NodeJS Buffer or Uint8Array) or a string, for the response. `serializeStream(data): ReadableStream`: If defined, this will be called with the data structure and should return the data serialized as a ReadableStream. This is generally necessary for handling asynchronous iteratables. `deserialize(Buffer|string): any`: If defined (and deserializeStream is not defined), this will be called with the raw data received from the incoming request and should return the deserialized data structure. This will be called with a string for text MIME types ("text/..."), and a Buffer for all others. `deserializeStream(ReadableStream): any`: If defined (and deserializeStream is not defined), this will be called with the raw data stream received from the incoming request and should return the deserialized data structure (potentially as an asynchronous iterable). `q: number`: This is an indication of this serialization quality between 0 and 1, and if omitted, defaults to 1. It is called "content negotiation" instead of "content demanding" because both client and server may have multiple supported content types, and the server needs to choose the best for both. This is determined by finding the content type (of all supported) with the highest product of client q and server q (1 is a perfect representation of the data, 0 is worst, 0.5 is medium quality). - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` - -## Trusted/Untrusted - -Extensions will also be categorized as trusted or untrusted. For some HarperDB installations, administrators may choose to constrain users to only using trusted extensions for security reasons (such multi-tenancy requirements or added defense in depth). Most installations do not impose such constraints, but this may exist in some situations. - -An extension can be automatically considered trusted if it conforms to the requirements of [Secure EcmaScript](https://www.npmjs.com/package/ses/v/0.7.0) (basically strict mode code that doesn't modify any global objects), and either does not use any other modules, or only uses modules from other trusted extensions/components. An extension can be marked as trusted by review by the HarperDB team as well, but developers should not expect that HarperDB can review all extensions. Untrusted extensions can access any other packages/modules, and may have many additional capabilities. diff --git a/versioned_docs/version-4.2/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.2/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index cc4634fc..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created HarperDB will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.2/developers/operations-api/bulk-operations.md deleted file mode 100644 index b6f6a07f..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into HarperDB - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which HarperDB is running. For example, the path to a CSV on your computer will produce an error if your HarperDB instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running harperdb - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/clustering.md b/versioned_docs/version-4.2/developers/operations-api/clustering.md deleted file mode 100644 index 8dbbff78..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/clustering.md +++ /dev/null @@ -1,413 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional HarperDB instance with associated subscriptions. Learn more about HarperDB clustering here: [https://harperdb.io/docs/clustering/](https://harperdb.io/docs/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing HarperDB instance registration and associated subscriptions. Learn more about HarperDB clustering here: [https://harperdb.io/docs/clustering/](https://harperdb.io/docs/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about HarperDB clustering here: [https://harperdb.io/docs/clustering/](https://harperdb.io/docs/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about HarperDB clustering here: [https://harperdb.io/docs/clustering/](https://harperdb.io/docs/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a HarperDB instance and associated subscriptions from the cluster. Learn more about HarperDB clustering here: [https://harperdb.io/docs/clustering/](https://harperdb.io/docs/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about HarperDB clustering here: [https://harperdb.io/docs/clustering/](https://harperdb.io/docs/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/components.md b/versioned_docs/version-4.2/developers/operations-api/components.md deleted file mode 100644 index 0abc5406..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/components.md +++ /dev/null @@ -1,314 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a predefined template. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website.\ - -If deploying with the `payload` option, HarperDB will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory.\ - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have an installed SSH keys on the server: - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/custom-functions.md b/versioned_docs/version-4.2/developers/operations-api/custom-functions.md deleted file mode 100644 index 7b483c8a..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,277 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDB Studio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. HarperDB Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.2/developers/operations-api/databases-and-tables.md deleted file mode 100644 index 140f5e53..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,386 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts about 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define schema values for setting role-based permissions or for any other reason.** - -_Note: HarperDB will automatically create new attributes on insert and update if they do not already exist within the schema._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with HarperDB not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (HarperDB actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup HarperDB databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/index.md b/versioned_docs/version-4.2/developers/operations-api/index.md deleted file mode 100644 index cb83098b..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/index.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling HarperDB. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../../deployments/configuration), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security/basic-auth) or [JWT authentication](./security/jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [Utilities](operations-api/utilities) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/jobs.md b/versioned_docs/version-4.2/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/logs.md b/versioned_docs/version-4.2/developers/operations-api/logs.md deleted file mode 100644 index aaf0c893..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/logs.md +++ /dev/null @@ -1,732 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read HarperDB Log - -Returns log outputs from the primary HarperDB log based on the provided search criteria. Read more about HarperDB logging here: [https://docs.harperdb.io/docs/4.2/administration/logging#read-logs-via-the-api](../../administration/logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 100. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `error`, `info`, or `null` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss` -- `order` _(optional)_ - order to display logs desc or asc by timestamp - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.2/administration/logging/transaction-logging#read_transaction_log](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.2/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.2/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values [`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.2/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the HarperDB user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.2/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the HarperDB configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.2/developers/operations-api/nosql-operations.md deleted file mode 100644 index c21ff8b7..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,360 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `search_attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_value` _(required)_ - value you wish to search - wild cards are allowed -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "search_attribute": "owner_name", - "search_value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array - - `search_attribute` _(required)_ - the attribute you wish to search, can be any attribute - - `search_type` _(required)_ - the type of search to perform - `equals`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_value` _(required)_ - case-sensitive value you wish to search. If the `search_type` is `between` then use an array of two values to search between (both inclusive) - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "get_attributes": ["*"], - "conditions": [ - { - "search_attribute": "age", - "search_type": "between", - "search_value": [5, 8] - }, - { - "search_attribute": "weight_lbs", - "search_type": "greater_than", - "search_value": 40 - }, - { - "search_attribute": "adorable", - "search_type": "equals", - "search_value": true - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.2/developers/operations-api/quickstart-examples.md deleted file mode 100644 index 22b16f43..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,368 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in HarperDB are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. A hash attribute is an attribute that defines the unique identifier for each row in your table. In a traditional RDMS this would be called a primary key. - -HarperDB does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and HarperDB will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -HarperDB supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/registration.md b/versioned_docs/version-4.2/developers/operations-api/registration.md deleted file mode 100644 index 7812e843..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/registration.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the HarperDB instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Get Fingerprint - -Returns the HarperDB fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -Sets the HarperDB license as generated by HarperDB License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/sql-operations.md b/versioned_docs/version-4.2/developers/operations-api/sql-operations.md deleted file mode 100644 index 6745f1c2..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: SQL Operations ---- - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/token-authentication.md b/versioned_docs/version-4.2/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.2/developers/operations-api/users-and-roles.md deleted file mode 100644 index 250d83f7..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. Learn more about HarperDB roles here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. Learn more about HarperDB roles here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of schema names (as strings). If boolean, user can create new schemas and tables. If array of strings, users can only manage tables within the specified schemas. This overrides any individual table permissions for specified schemas, or for all schemas if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "develope3r", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. Learn more about HarperDB roles here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of schema names (as strings). If boolean, user can create new schemas and tables. If array of strings, users can only manage tables within the specified schemas. This overrides any individual table permissions for specified schemas, or for all schemas if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. Learn more about HarperDB roles here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "2ebc3415-0aa0-4eea-9b8e-40860b436119" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. Learn more about HarperDB users here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. Learn more about HarperDB users here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. HarperDB will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your HarperDB instance. If set to false, user will not be able to access your instance of HarperDB. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. Learn more about HarperDB users here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. HarperDB will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your HarperDB instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. Learn more about HarperDB users here: [https://harperdb.io/docs/security/users-roles/](https://harperdb.io/docs/security/users-roles/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/versioned_docs/version-4.2/developers/operations-api/utilities.md b/versioned_docs/version-4.2/developers/operations-api/utilities.md deleted file mode 100644 index b22b8603..00000000 --- a/versioned_docs/version-4.2/developers/operations-api/utilities.md +++ /dev/null @@ -1,376 +0,0 @@ ---- -title: Utilities ---- - -# Utilities - -## Restart - -Restarts the HarperDB instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified HarperDB service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` - ---- - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value` or `sql` - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Install Node Modules - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` - ---- - -## Set Configuration - -Modifies the HarperDB configuration file parameters. Must follow with a [restart](#restart) or [restart_service](#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the HarperDB configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the HarperDB configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the HarperDB configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "clustering": { - "enabled": true, - "hubServer": { - "cluster": { - "name": "harperdb", - "network": { - "port": 12345, - "routes": null - } - }, - "leafNodes": { - "network": { - "port": 9931 - } - }, - "network": { - "port": 9930 - } - }, - "leafServer": { - "network": { - "port": 9940, - "routes": null - }, - "streams": { - "maxAge": null, - "maxBytes": null, - "maxMsgs": null, - "path": "/Users/hdb/clustering/leaf" - } - }, - "logLevel": "info", - "nodeName": "node1", - "republishMessages": false, - "databaseLevel": false, - "tls": { - "certificate": "/Users/hdb/keys/certificate.pem", - "certificateAuthority": "/Users/hdb/keys/ca.pem", - "privateKey": "/Users/hdb/keys/privateKey.pem", - "insecure": true, - "verify": true - }, - "user": "cluster_user" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "certificate": "/Users/hdb/keys/certificate.pem", - "certificateAuthority": "/Users/hdb/keys/ca.pem", - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` diff --git a/versioned_docs/version-4.2/developers/real-time.md b/versioned_docs/version-4.2/developers/real-time.md deleted file mode 100644 index 4c4c3183..00000000 --- a/versioned_docs/version-4.2/developers/real-time.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -HarperDB provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. HarperDB supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -HarperDB real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a HarperDB application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -HarperDB is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -HarperDB supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in HarperDB is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -HarperDB supports MQTT with its `mqtt` server module and HarperDB supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. - -#### Capabilities - -HarperDB's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In HarperDB topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -HarperDB supports QoS 0 and 1 for publishing and subscribing. - -HarperDB supports multi-level topics, both for subscribing and publishing. HarperDB also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. HarperDB currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -### Ordering - -HarperDB is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. HarperDB is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, HarperDB does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because HarperDB prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and HarperDB will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.2/developers/rest.md b/versioned_docs/version-4.2/developers/rest.md deleted file mode 100644 index 761d25e6..00000000 --- a/versioned_docs/version-4.2/developers/rest.md +++ /dev/null @@ -1,200 +0,0 @@ ---- -title: REST ---- - -# REST - -HarperDB provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. The name of the query or the [exported](./applications/defining-schemas#export) name of the resource defines the beginning of the endpoint path. From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several different levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -## GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -#### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. - -## PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -## DELETE - -This can be used to delete a record or records. - -## `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -## `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -## POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -## Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in HarperDB. This can be used to search by a single property name and value, to find all records which provide value for the given property/attribute. It is important to note that this property must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the properties needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /product/?category=software&price=gt=100&price=lt=200 -``` - -HarperDB has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -### `select(properties)` - -This allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. - -To get a list of product names with a category of software: - -```http -GET /product/?category=software&select(name) -``` - -### `limit(start,end)` or `limit(end)` - -Specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /product?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, HarperDB supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/versioned_docs/version-4.2/developers/security/basic-auth.md b/versioned_docs/version-4.2/developers/security/basic-auth.md deleted file mode 100644 index f3d76e29..00000000 --- a/versioned_docs/version-4.2/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -HarperDB uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -** \_**You do not need to log in separately. Basic Auth is added to each HTTP request like create_schema, create_table, insert etc… via headers.**\_ ** - -A header is added to each HTTP request. The header key is **"Authorization"** the header value is **"Basic <<your username and password buffer token>>"** - -## Authentication in HarperDB Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for HarperDB. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.2/developers/security/certificate-management.md b/versioned_docs/version-4.2/developers/security/certificate-management.md deleted file mode 100644 index d669f078..00000000 --- a/versioned_docs/version-4.2/developers/security/certificate-management.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for HarperDB external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of HarperDB does not have HTTPS enabled (see [configuration](../../deployments/configuration) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart HarperDB. - -By default HarperDB will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your HarperDB node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your HarperDB node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable HarperDB HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart HarperDB. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the HarperDB configuration with the path of your new certificate files, and then restart HarperDB. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set HarperDB will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for HarperDB, Nginx can be used as a reverse proxy for HarperDB. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to HarperDB as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for HarperDB, a number of different external services can be used as a reverse proxy for HarperDB. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to HarperDB as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for HarperDB administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.2/developers/security/configuration.md b/versioned_docs/version-4.2/developers/security/configuration.md deleted file mode 100644 index c8134aac..00000000 --- a/versioned_docs/version-4.2/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -HarperDB was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with HarperDB. - -## CORS - -HarperDB allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, HarperDB enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harperdb.io,https://products.harperdb.io` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -HarperDB provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose HarperDB's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -HarperDB automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from HarperDB Operations API.** diff --git a/versioned_docs/version-4.2/developers/security/index.md b/versioned_docs/version-4.2/developers/security/index.md deleted file mode 100644 index 0a3b5952..00000000 --- a/versioned_docs/version-4.2/developers/security/index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Security ---- - -# Security - -HarperDB uses role-based, attribute-level security to ensure that users can only gain access to the data they’re supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -- [JWT Authentication](security/jwt-auth) -- [Basic Authentication](security/basic-auth) -- [Configuration](security/configuration) -- [Users and Roles](security/users-and-roles) diff --git a/versioned_docs/version-4.2/developers/security/jwt-auth.md b/versioned_docs/version-4.2/developers/security/jwt-auth.md deleted file mode 100644 index 4b3ea934..00000000 --- a/versioned_docs/version-4.2/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -HarperDB uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all HarperDB operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their HarperDB credentials. The following POST body is sent to HarperDB. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by HarperDB. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by HarperDB. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.2/developers/security/users-and-roles.md b/versioned_docs/version-4.2/developers/security/users-and-roles.md deleted file mode 100644 index c2d8fa5d..00000000 --- a/versioned_docs/version-4.2/developers/security/users-and-roles.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -HarperDB utilizes a Role-Based Access Control (RBAC) framework to manage access to HarperDB instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in HarperDB - -Role permissions in HarperDB are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a HarperDB instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. HarperDB will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing schemas, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within HarperDB. See full breakdown of operations restricted to only super_user roles [here](#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a HarperDB instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop schemas & tables. Alternatively the role type can be assigned a string array. The values in this array are schemas and allows the role to only create and drop tables in the designated schemas. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all schemas, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a HarperDB instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```jsonc -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "schema_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true, - }, - ], - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [], - }, - }, - }, - }, -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional schema-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within HarperDB, but ignored, as super_users have full access to the database._ - -- `permissions`: Schema tables that a role should have specific CRUD access to should be included in the final, schema-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its schema in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] -} -``` - -**Important Notes About Table Permissions** - -1. If a schema and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the schema and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that HarperDB manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in HarperDB and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the schema-level CRUD permissions set for the roles._ - -| Schemas and Tables | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| describe_all | | -| describe_schema | | -| describe_table | | -| create_schema | X | -| drop_schema | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed HarperDB as `<>`. Because HarperDB stores files natively on the operating system, we only allow the HarperDB executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files HarperDB needs. This also keeps HarperDB more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.2/getting-started.md b/versioned_docs/version-4.2/getting-started.md deleted file mode 100644 index af4c77bb..00000000 --- a/versioned_docs/version-4.2/getting-started.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Getting Started ---- - -# Getting Started - -HarperDB is designed for quick and simple setup and deployment, with smart defaults that lead to fast, scalable, and globally distributed database applications. - -You can easily create a HarperDB database in the cloud through our studio or install it locally. The quickest way to get HarperDB up and running is with [HarperDB Cloud](./deployments/harperdb-cloud/), our database-as-a-service offering. However, HarperDB is a [database application platform](./developers/applications/), and to leverage HarperDB’s full application development capabilities of defining schemas, endpoints, messaging, and gateway capabilities, you may wish to install and run HarperDB locally so that you can use your standard local IDE tools, debugging, and version control. - -### Installing a HarperDB Instance - -You can simply install HarperDB with npm (or yarn, or other package managers): - -```shell -npm install -g harperdb -``` - -Here we installed HarperDB globally (and we recommend this) to make it easy to run a single HarperDB instance with multiple projects, but you can install it locally (not globally) as well. - -You can run HarperDB by running: - -```javascript -harperdb; -``` - -You can now use HarperDB as a standalone database. You can also create a cloud instance (see below), which is also an easy way to get started. - -#### Developing Database Applications with HarperDB - -HarperDB is more than just a database, with HarperDB you build "database applications" which package your schema, endpoints, and application logic together. You can then deploy your application to an entire cluster of HarperDB instances, ready to scale to on-the-edge delivery of data and application endpoints directly to your users. To get started with HarperDB, take a look at our application development guide, with quick and easy examples: - -[Database application development guide](./developers/applications/) - -### Setting up a Cloud Instance - -To set up a HarperDB cloud instance, simply sign up and create a new instance: - -1. [Sign up for the HarperDB Studio](https://studio.harperdb.io/sign-up) -1. [Create a new HarperDB Cloud instance](./administration/harperdb-studio/instances#create-a-new-instance) - -Note that a local instance and cloud instance are not mutually exclusive. You can register your local instance in the HarperDB Studio, and a common development flow is to develop locally and then deploy your application to your cloud instance. - -HarperDB Cloud instance provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -#### Using the HarperDB Studio - -Now that you have a HarperDB instance, if you want to use HarperDB as a standalone database, you can fully administer and interact with our database through the Studio. This section links to appropriate articles to get you started interacting with your data. - -1. [Create a schema](./administration/harperdb-studio/manage-schemas-browse-data#create-a-schema) -1. [Create a table](./administration/harperdb-studio/manage-schemas-browse-data#create-a-table) -1. [Add a record](./administration/harperdb-studio/manage-schemas-browse-data#add-a-record) -1. [Load CSV data](./administration/harperdb-studio/manage-schemas-browse-data#load-csv-data) (Here’s a sample CSV of the HarperDB team’s dogs) -1. [Query data via SQL](./administration/harperdb-studio/query-instance-data) - -## Administering HarperDB - -If you are deploying and administering HarperDB, you may want to look at our [configuration documentation](./deployments/configuration) and our administrative operations API below. - -### HarperDB APIs - -The preferred way to interact with HarperDB for typical querying, accessing, and updating data (CRUD) operations is through the REST interface, described in the [REST documentation](./developers/rest). - -The Operations API provides extensive administrative capabilities for HarperDB, and the [Operations API documentation has usage and examples](./developers/operations-api/). Generally it is recommended that you use the RESTful interface as your primary interface for performant data access, querying, and manipulation (DML) for building production applications (under heavy load), and the operations API (and SQL) for data definition (DDL) and administrative purposes. - -The HarperDB Operations API is single endpoint, which means the only thing that needs to change across different calls is the body. For example purposes, a basic cURL command is shown below to create a schema called dev. To change this behavior, swap out the operation in the `data-raw` body parameter. - -``` -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_schema", - "schema": "dev" -}' -``` - -## Support and Learning More - -If you find yourself in need of additional support you can submit a [HarperDB support ticket](https://harperdbhelp.zendesk.com/hc/en-us/requests/new). You can also learn more about available HarperDB projects by searching [Github](https://github.com/search?q=harperdb). - -### Video Tutorials - -[HarperDB video tutorials are available on our YouTube channel](https://www.youtube.com/@harperdbio). HarperDB and the HarperDB Studio are constantly changing, as such, there may be small discrepancies in UI/UX. diff --git a/versioned_docs/version-4.2/index.md b/versioned_docs/version-4.2/index.md deleted file mode 100644 index a73fed85..00000000 --- a/versioned_docs/version-4.2/index.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: HarperDB Docs ---- - -# HarperDB Docs - -HarperDB is a globally-distributed edge application platform. It reduces complexity, increases performance, and lowers costs by combining user-defined applications, a high-performance database, and an enterprise-grade streaming broker into a single package. The platform offers unlimited horizontal scale at the click of a button, and syncs data across the cluster in milliseconds. HarperDB simplifies the process of delivering applications and the data that drives them to the edge, which dramatically improves both the user experience and total cost of ownership for large-scale applications. Deploying HarperDB on global infrastructure enables a CDN-like solution for enterprise data and applications. - -HarperDB's documentation covers installation, getting started, administrative operation APIs, security, and much more. Browse the topics at left, or choose one of the commonly used documentation sections below. - -:::info -Wondering what's new with HarperDB 4.2? Take a look at our latest [Release Notes](/release-notes/v4-tucker/4.2.0). -::: - -## Getting Started - -
-
-

- - Getting Started Guide - -

-

- Get up and running with HarperDB -

-
-
-

- - Quick Install HarperDB - -

-

- Run HarperDB on your on hardware -

-
-
-

- - Try HarperDB Cloud - -

-

- Spin up an instance in minutes to going fast -

-
-
- -## Building with HarperDB - -
-
-

- - HarperDB Applications - -

-

- Build your a fully featured HarperDB Component with custom functionality -

-
-
-

- - REST Queries - -

-

- The recommended HTTP interface for data access, querying, and manipulation -

-
-
-

- - Operations API - -

-

- Configure, deploy, administer, and control your HarperDB instance -

-
-
- -
-
-

- - Clustering & Replication - -

-

- The process of connecting multiple HarperDB databases together to create a database mesh network that enables users to define data replication patterns. -

-
-
-

- - Explore the HarperDB Studio - -

-

- The web-based GUI for HarperDB. Studio enables you to administer, navigate, and monitor all of your HarperDB instances in a simple, user friendly interface. -

-
-
diff --git a/versioned_docs/version-4.2/reference/_category_.json b/versioned_docs/version-4.2/reference/_category_.json deleted file mode 100644 index d6302ac2..00000000 --- a/versioned_docs/version-4.2/reference/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Reference", - "position": 4, - "link": { - "type": "generated-index", - "title": "Reference Documentation", - "description": "Reference documentation and technical specifications", - "keywords": ["reference", "specifications"] - } -} diff --git a/versioned_docs/version-4.2/reference/analytics.md b/versioned_docs/version-4.2/reference/analytics.md deleted file mode 100644 index 314dcd94..00000000 --- a/versioned_docs/version-4.2/reference/analytics.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -HarperDB provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -HarperDB collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the HarperDB analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -The following are general resource usage statistics that are tracked: - -- `memory` - This includes RSS, heap, buffer and external data usage. -- `utilization` - How much of the time the worker was processing requests. -- mqtt-connections - The number of MQTT connections. - -The following types of information is tracked for each HTTP request: - -- `success` - How many requests returned a successful response (20x response code). TTFB - Time to first byte in the response to the client. -- `transfer` - Time to finish the transfer of the data to the client. -- bytes-sent - How many bytes of data were sent to the client. - -Requests are categorized by operation name, for the operations API, by the resource (name) with the REST API, and by command for the MQTT interface. diff --git a/versioned_docs/version-4.2/reference/architecture.md b/versioned_docs/version-4.2/reference/architecture.md deleted file mode 100644 index 5bbb1e47..00000000 --- a/versioned_docs/version-4.2/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -HarperDB's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/versioned_docs/version-4.2/reference/clustering/certificate-management.md b/versioned_docs/version-4.2/reference/clustering/certificate-management.md deleted file mode 100644 index e77a9a1c..00000000 --- a/versioned_docs/version-4.2/reference/clustering/certificate-management.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box HarperDB generates certificates that are used when HarperDB nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the HarperDB node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that HarperDB generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your HarperDB cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make HarperDB start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart HarperDB. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other HarperDB nodes and to make requests to other HarperDB nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart HarperDB. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.2/reference/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.2/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 7865ae1c..00000000 --- a/versioned_docs/version-4.2/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via HarperDB users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, HarperDB must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.2/reference/clustering/enabling-clustering.md b/versioned_docs/version-4.2/reference/clustering/enabling-clustering.md deleted file mode 100644 index 596665d9..00000000 --- a/versioned_docs/version-4.2/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file HarperDB must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to HarperDB configuration HarperDB must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install HarperDB**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.2/reference/clustering/establishing-routes.md b/versioned_docs/version-4.2/reference/clustering/establishing-routes.md deleted file mode 100644 index abe415a5..00000000 --- a/versioned_docs/version-4.2/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the HarperDB configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.2/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to HarperDB configuration HarperDB must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.2/reference/clustering/index.md b/versioned_docs/version-4.2/reference/clustering/index.md deleted file mode 100644 index 92fe00fe..00000000 --- a/versioned_docs/version-4.2/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -HarperDB clustering is the process of connecting multiple HarperDB databases together to create a database mesh network that enables users to define data replication patterns. - -HarperDB’s clustering engine replicates data between instances of HarperDB using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -HarperDB simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting HarperDB focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to HarperDB, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.2/reference/clustering/managing-subscriptions.md b/versioned_docs/version-4.2/reference/clustering/managing-subscriptions.md deleted file mode 100644 index bee50508..00000000 --- a/versioned_docs/version-4.2/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: Managing subscriptions ---- - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The schema and tables in the subscription must exist on either the local or the remote node. Any schema and tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `add_node`. - -```json -{ - "operation": "add_node", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "schema": "dev", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node use `update_node`. - -```json -{ - "operation": "update_node", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "schema": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "schema": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "add_node", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.2/reference/clustering/naming-a-node.md b/versioned_docs/version-4.2/reference/clustering/naming-a-node.md deleted file mode 100644 index 308aef7a..00000000 --- a/versioned_docs/version-4.2/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file HarperDB must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.2/reference/clustering/requirements-and-definitions.md b/versioned_docs/version-4.2/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 1e2dd6af..00000000 --- a/versioned_docs/version-4.2/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of HarperDB running. - -\*_A node is a single instance/installation of HarperDB. A node of HarperDB can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a HarperDB cluster. diff --git a/versioned_docs/version-4.2/reference/clustering/subscription-overview.md b/versioned_docs/version-4.2/reference/clustering/subscription-overview.md deleted file mode 100644 index 63246c4f..00000000 --- a/versioned_docs/version-4.2/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching schema name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`schema` - the name of the schema that the table you are creating the subscription for belongs to. - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.2/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.2/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.2/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.2/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.2/reference/clustering/things-worth-knowing.md b/versioned_docs/version-4.2/reference/clustering/things-worth-knowing.md deleted file mode 100644 index d737e01f..00000000 --- a/versioned_docs/version-4.2/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any schemas and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive schema operations do not replicate across a cluster**. Those operations include `drop_schema`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop schema information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -HarperDB has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -HarperDB clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.2/clustering/figure6.png) diff --git a/versioned_docs/version-4.2/reference/content-types.md b/versioned_docs/version-4.2/reference/content-types.md deleted file mode 100644 index 6aee4850..00000000 --- a/versioned_docs/version-4.2/reference/content-types.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -HarperDB supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. HarperDB follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard HarperDB operations. - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by HarperDB, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with HarperDB. CBOR supports the full range of HarperDB data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and HarperDB's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all HarperDB data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with HarperDB's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. diff --git a/versioned_docs/version-4.2/reference/data-types.md b/versioned_docs/version-4.2/reference/data-types.md deleted file mode 100644 index fca44b40..00000000 --- a/versioned_docs/version-4.2/reference/data-types.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -HarperDB supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (HarperDB’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. As of v4.1, HarperDB supports MessagePack and CBOR, which allows for all of HarperDB supported data types. This includes: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to 64-bit or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. JSON is parsed by JS, so the maximum safe (precise) integer is 9007199254740991 (larger numbers can be stored, but aren’t guaranteed integer precision). Custom Functions may use BigInt numbers to store/access larger 64-bit integers, but integers beyond 64-bit can’t be stored with integer precision (will be stored as standard double-precision numbers). The GraphQL schema type name is `Float` (`Int` can also be used to describe numbers that should fit into signed 32-bit integers). - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in HarperDB. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in HarperDB’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in HarperDB. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in HarperDB property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well. JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in HarperDB as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.2/reference/dynamic-schema.md b/versioned_docs/version-4.2/reference/dynamic-schema.md deleted file mode 100644 index ea6ac8cc..00000000 --- a/versioned_docs/version-4.2/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. HarperDB tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -HarperDB databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -HarperDB tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in HarperDB operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [HarperDB Storage Algorithm](storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to HarperDB. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. HarperDB offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -HarperDB automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [HarperDB API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.2/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.2/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.2/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.2/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.2/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.2/reference/globals.md b/versioned_docs/version-4.2/reference/globals.md deleted file mode 100644 index 535dfddf..00000000 --- a/versioned_docs/version-4.2/reference/globals.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with HarperDB is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that HarperDB provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -### `tables` - -This is an object that holds all the tables for the default database (called `data`) as properties. Each of these property values is a table class that subclasses the Resource interface and provides access to the table through the Resource interface. For example, you can get a record from a table (in the default database) called 'my-table' with: - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -async function getRecord() { - let record = await MyTable.get(recordId); -} -``` - -It is recommended that you [define a schema](../getting-started/) for all the tables that are required to exist in your application. This will ensure that the tables exist on the `tables` object. Also note that the property names follow a CamelCase convention for use in JavaScript and in the GraphQL Schemas, but these are translated to snake_case for the actual table names, and converted back to CamelCase when added to the `tables` object. - -### `databases` - -This is an object that holds all the databases in HarperDB, and can be used to explicitly access a table by database name. Each database will be a property on this object, each of these property values will be an object with the set of all tables in that database. The default database, `databases.data` should equal the `tables` export. For example, if you want to access the "dog" table in the "dev" database, you could do so: - -```javascript -import { databases } from 'harperdb'; -const { Dog } = databases.dev; -``` - -### `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](resource) for more details about implementing a Resource class. - -### `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -### `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -### `server` - -This provides a number of functions and objects to interact with the server including: - -#### `server.config` - -This provides access to the HarperDB configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -#### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into HarperDB's analytics. HarperDB efficiently records and tracks these metrics and makes them available through [analytics API](./analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. diff --git a/versioned_docs/version-4.2/reference/headers.md b/versioned_docs/version-4.2/reference/headers.md deleted file mode 100644 index 3ddc8528..00000000 --- a/versioned_docs/version-4.2/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: HarperDB Headers ---- - -# HarperDB Headers - -All HarperDB API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all HarperDB API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.2/reference/index.md b/versioned_docs/version-4.2/reference/index.md deleted file mode 100644 index 762a0831..00000000 --- a/versioned_docs/version-4.2/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for HarperDB. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.2/reference/limits.md b/versioned_docs/version-4.2/reference/limits.md deleted file mode 100644 index 8ea207ba..00000000 --- a/versioned_docs/version-4.2/reference/limits.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: HarperDB Limits ---- - -# HarperDB Limits - -This document outlines limitations of HarperDB. - -## Database Naming Restrictions - -**Case Sensitivity** - -HarperDB database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -HarperDB database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -HarperDB limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. diff --git a/versioned_docs/version-4.2/reference/resource.md b/versioned_docs/version-4.2/reference/resource.md deleted file mode 100644 index 6baec084..00000000 --- a/versioned_docs/version-4.2/reference/resource.md +++ /dev/null @@ -1,538 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to model different data resources within HarperDB. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. Therefore there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -The RESTful HTTP server and other server interfaces will instantiate/load resources to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, they will be executed in transaction and the access checks will be performed before the method is executed. - -Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that does specify an ID like `/MyResource/3492` will be mapped to a Resource instance where the instance's ID will be `3492`, and interactions will use the instance methods like `get()`, `put()`, and `post()`. Using the root path (`/MyResource/`) will map to a Resource instance with an ID of `null`. - -You can create classes that extend Resource to define your own data sources, typically to interface with external data sources (the Resource base class is available as a global variable in the HarperDB JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - get() { - // fetch data from an external source, using our primary key - this.fetch(this.id); - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a HarperDB data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the HarperDB JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in the your schema, so that you aren't exporting the same table/class twice. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -### `databases` - -This is an object with all the databases that have been defined in HarperDB (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../developers/components/writing-extensions). - -### `transaction` - -This provides a function for starting transactions. See the transactions section below for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the [content type extensions documentation](../developers/components/writing-extensions) for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main HarperDB package in your app: - -``` -# you may need to go to your harperdb directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main HarperDB APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -### `search(query: Query)`: AsyncIterable - -By default this is called by `get(query)` from a collection resource. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object)` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -### `patch(data: object)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?)` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.put(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message)` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data)` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally this provides a generic mechanism for various types of data updates. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `subscriptionRequest` object supports the following properties (all optional): - -- `id` - The primary key of the record (or topic) that you want to subscribe to. If omitted, this will be a subscription to the whole table. -- `isCollection` - If this is enabled and the `id` was included, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', isCollection: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user)` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. - -### `allowRead(user)` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. - -### `allowUpdate(user)` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. - -### `allowDelete(user)` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a HarperDB operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](https://api.harperdb.io/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. - -The get, put, delete, subscribe, and connect methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -``` -Id = string|number|array -``` - -### `put(record: object, context?: Resource|Context): Promise` - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will fully replace the existing record. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest, context?: Resource|Context): Promise` - -Subscribes to a record/resource. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -``` -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The options parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will convert a multi-segment path to multipart id (an array), which facilitates hierarchical id-based data access, and also parses `.property` suffixes for accessing properties and specifying preferred content type in the URL. However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](transactions) for more information on how transactions work in HarperDB. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -- `conditions`: This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object has the following properties: - - `attribute`: Name of the property/attribute to match on. - - `value`: The value to match. - - `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `operator`: Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This defaults to `"and"`. -- `limit`: This specifies the limit of the number of records that should be returned from the query. -- `offset`: This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. -- `select`: This specifies the specific properties that should be included in each record that is returned. This can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple object with all the properties (whether defined in the schema) as direct normal properties: - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.2/reference/sql-guide/date-functions.md b/versioned_docs/version-4.2/reference/sql-guide/date-functions.md deleted file mode 100644 index 535ac7b6..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,223 +0,0 @@ ---- -title: SQL Date Functions ---- - -# SQL Date Functions - -HarperDB utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.2/reference/sql-guide/features-matrix.md b/versioned_docs/version-4.2/reference/sql-guide/features-matrix.md deleted file mode 100644 index 85b9257a..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -HarperDB provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. If not, feel free to [add a Feature Request](https://feedback.harperdb.io/). - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.2/reference/sql-guide/functions.md b/versioned_docs/version-4.2/reference/sql-guide/functions.md deleted file mode 100644 index 8d161679..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/functions.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -title: HarperDB SQL Functions ---- - -# HarperDB SQL Functions - -This SQL keywords reference contains the SQL functions available in HarperDB. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM schema.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM schema.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM schema.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM schema.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM schema.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | --------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM schema.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM schema.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM schema.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | -------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM schema.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM schema.table` | Used to list the schema(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM schema.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM schema.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM schema.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM schema.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM schema.table_1 CROSS JOIN schema.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM schema.table_1 FULL OUTER JOIN schema.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM schema.table_1 INNER JOIN schema.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM schema.table_1 LEFT OUTER JOIN schema.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM schema.table_1 RIGHT OUTER JOIN schema.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ----------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM schema.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM schema.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | -------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM schema.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO schema.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE schema.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.2/reference/sql-guide/index.md b/versioned_docs/version-4.2/reference/sql-guide/index.md deleted file mode 100644 index dc49fe05..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## HarperDB SQL Guide - -The purpose of this guide is to describe the available functionality of HarperDB as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -HarperDB adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -HarperDB has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -HarperDB supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. HarperDB does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -HarperDB supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -HarperDB supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -HarperDB allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.2/reference/sql-guide/json-search.md b/versioned_docs/version-4.2/reference/sql-guide/json-search.md deleted file mode 100644 index d145bf4f..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/json-search.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: SQL JSON Search ---- - -# SQL JSON Search - -HarperDB automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, HarperDB offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample schema & data with query examples](../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.2/reference/sql-guide/reserved-word.md b/versioned_docs/version-4.2/reference/sql-guide/reserved-word.md deleted file mode 100644 index 7add41ff..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,203 +0,0 @@ ---- -title: HarperDB SQL Reserved Words ---- - -# HarperDB SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a schema, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called ASSERT in the dev schema, a SQL select on that table would look like: - -``` -SELECT * from dev.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from dev.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.2/reference/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.2/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index b4014a28..00000000 --- a/versioned_docs/version-4.2/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,415 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -# SQL Geospatial Functions - -HarperDB geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: [https://geojson.org/](https://geojson.org/). There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -2. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -3. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, schema and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between HarperDB’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain HarperDB Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "HarperDB Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see [https://developers.arcgis.com/documentation/spatial-references/](https://developers.arcgis.com/documentation/spatial-references/). Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find HarperDB Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "HarperDB Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "HarperDB Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.2/reference/storage-algorithm.md b/versioned_docs/version-4.2/reference/storage-algorithm.md deleted file mode 100644 index 49960c83..00000000 --- a/versioned_docs/version-4.2/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The HarperDB storage algorithm is fundamental to the HarperDB core functionality, enabling the [Dynamic Schema](dynamic-schema) and all other user-facing functionality. HarperDB is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within HarperDB. - -## Query Language Agnostic - -The HarperDB storage algorithm was designed to abstract the data storage from any individual query language. HarperDB currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, HarperDB offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each HarperDB table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. HarperDB tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [HarperDB Dynamic Schema](dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -HarperDB inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## HarperDB Indexing Example (Single Table) - -![](/img/v4.2/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.2/reference/transactions.md b/versioned_docs/version-4.2/reference/transactions.md deleted file mode 100644 index 984b0a71..00000000 --- a/versioned_docs/version-4.2/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. HarperDB provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in HarperDB. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because HarperDB is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the HarperDB environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_docs/version-4.3/administration/_category_.json b/versioned_docs/version-4.3/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/versioned_docs/version-4.3/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/versioned_docs/version-4.3/administration/administration.md b/versioned_docs/version-4.3/administration/administration.md deleted file mode 100644 index 5e4086e3..00000000 --- a/versioned_docs/version-4.3/administration/administration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -HarperDB is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own HarperDB servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database HarperDB is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. HarperDB provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](./administration/logging/): HarperDB defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). HarperDB has a [`get_backup`](./developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. HarperDB can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -HarperDB provides rapid horizontal scaling capabilities through [node cloning functionality described here](cloning.md). - -### Monitoring - -HarperDB provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: - -- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](../reference/analytics). -- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](./developers/operations-api/utilities#system-information). -- Information about the current cluster configuration and status can be found in the [cluster APIs](./developers/operations-api/clustering). -- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor HarperDB with Graphana. - -### Replication Transaction Logging - -HarperDB utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](./administration/logging/). diff --git a/versioned_docs/version-4.3/administration/cloning.md b/versioned_docs/version-4.3/administration/cloning.md deleted file mode 100644 index f6a7db21..00000000 --- a/versioned_docs/version-4.3/administration/cloning.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that when pointed to another instance of HarperDB will create a clone of that -instance's config, databases and setup replication. If it is run in a location where there is no existing HarperDB install, -it will, along with cloning, install HarperDB. If it is run in a location where there is another HarperDB instance, it will -only clone config, databases and replication that do not already exist. - -Clone node is triggered when HarperDB is installed or started with certain environment or command line (CLI) variables set (see below). - -**Leader node** - the instance of HarperDB you are cloning.\ -**Clone node** - the new node which will be a clone of the leader node. - -To start clone run `harperdb` in the CLI with either of the following variables set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `HDB_LEADER_CLUSTERING_HOST` - _(optional)_ The leader clustering host. This value will be added to the clustering routes on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 HDB_LEADER_CLUSTERING_HOST=node-1.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--HDB_LEADER_CLUSTERING_HOST` - _(optional)_ The leader clustering host. This value will be added to the clustering routes on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --HDB_LEADER_CLUSTERING_HOST node-1.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from -running again. If you want to run clone again set this value to `false`. If HarperDB is started with the clone variables -still present and `cloned` is true, HarperDB will just start as normal. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI -variables or cloning overtop of an existing harperdb-config.yaml file. - -More can be found in the HarperDB config documentation [here](../deployments/configuration). - -_Note: because node name must be unique, clone will auto-generate one unless one is provided_ - -### Excluding database, components and replication - -To set any specific (optional) clone config, including the exclusion of any database, components or replication, there is a file -called `clone-node-config.yaml` that can be used. - -The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. -If the directory does not exist, create one and add the file to it). - -The config available in `clone-node-config.yaml` is: - -```yaml -databaseConfig: - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -componentConfig: - exclude: - - name: null -clusteringConfig: - publishToLeaderNode: true - subscribeToLeaderNode: true - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -``` - -_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, -unless it already exists on the clone._ - -`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. - -`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, -it will only clone the component reference that exists in the leader harperdb-config file. - -`clusteringConfig` - Set the replication setup to establish with the other nodes (default is `true` & `true`) and -set any databases or tables that you wish to exclude from clustering. - -### Cloning configuration - -Clone node will not clone any configuration that is classed as unique to the leader node. This includes `clustering.nodeName`, -`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, -any authentication certificate/key paths. - -**Clustering Routes** - -By default, the clone will send a set routes request to the leader node. The default `host` used in this request will be the -host name of the clone operating system. - -To manually set a host use the variable `HDB_CLONE_CLUSTERING_HOST`. - -To disable the setting of the route set `HDB_SET_CLUSTERING_HOST` to `false`. - -### Cloning system database - -HarperDB uses a database called `system` to store operational information. Clone node will only clone the user and role -tables from this database. It will also set up replication on this table, which means that any existing and future user and roles -that are added will be replicated throughout the cluster. - -Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with -the leader. - -### Fully connected clone - -A fully connected topology is when all nodes are replicating (publish and subscribing) with all other nodes. -A fully connected clone maintains this topology with addition of the new node. When a clone is created, -replication is added between the leader and the clone and any nodes the leader is replicating with. For example, -if the leader is replicating with node-a and node-b, the clone will replicate with the leader, node-a and node-b. - -To run clone node with the fully connected option simply pass the environment variable `HDB_FULLY_CONNECTED=true` or CLI variable `--HDB_FULLY_CONNECTED true`. - -### Cloning overtop of an existing HarperDB instance - -Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication -that does not exist on the node it is running on. - -An example of how this can be useful is if you want to set HarperDB config before the clone is created. To do this you -would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then -when clone is run it will append the missing config to the file and install HarperDB with the desired config. - -Another useful example could be retroactively adding another database to an existing instance. Running clone on -an existing instance could create a full clone of another database and set up replication between the database on the -leader and the clone. - -### Cloning steps - -Clone node will execute the following steps when ran: - -1. Look for an existing HarperDB install. It does this by using the default (or user provided) `ROOTPATH`. -1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start HarperDB. -1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). -1. Fully clone any databases that don't already exist. -1. If classed as a "fresh clone", install HarperDB. An instance is classed as a fresh clone if there is no system database. -1. If clustering is enabled on the leader and the `HDB_LEADER_CLUSTERING_HOST` variable is provided, set up replication on all cloned database(s). -1. Clone is complete, start HarperDB. - -### Cloning with Docker - -To run clone inside a container add the environment variables to your run command. - -For example: - -``` -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_LEADER_PASSWORD=password \ - -e HDB_LEADER_USERNAME=admin \ - -e HDB_LEADER_URL=https://1.123.45.6:9925 \ - -e HDB_LEADER_CLUSTERING_HOST=1.123.45.6 \ - -p 9925:9925 \ - -p 9926:9926 \ - harperdb/harperdb -``` - -Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/versioned_docs/version-4.3/administration/compact.md b/versioned_docs/version-4.3/administration/compact.md deleted file mode 100644 index 00c27307..00000000 --- a/versioned_docs/version-4.3/administration/compact.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: Compact a database ---- - -# Compact a database - -Database files can grow quickly as you use them, sometimes impeding performance. -HarperDB has multiple compact features that can be used to reduce database file size and potentially improve performance. -The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. - -There are two options that HarperDB offers for compacting a Database. - -_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, -this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ - -More information on the storage configuration options can be [found here](../deployments/configuration#storage) - -### Copy compaction - -It is recommended that, to prevent any record loss, HarperDB is not running when performing this operation. - -This will copy a HarperDB database with compaction. If you wish to use this new database in place of the original, -you will need to move/rename it to the path of the original database. - -This command should be run in the [CLI](../deployments/harperdb-cli) - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - -### Compact on start - -Compact on start is a more automated option that will compact **all** databases when HarperDB is started. HarperDB will -not start until compact is complete. Under the hood it loops through all non-system databases, -creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database -to where the original one is located and remove any backups. - -Compact on start is initiated by config in harperdb-config.yaml - -_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ - -`compactOnStart` - _Type_: boolean; _Default_: false - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -Using CLI variables - -```bash ---STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true -``` - -```bash -STORAGE_COMPACTONSTART=true -STORAGE_COMPACTONSTARTKEEPBACKUP=true -``` diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/create-account.md b/versioned_docs/version-4.3/administration/harperdb-studio/create-account.md deleted file mode 100644 index 3d146bb6..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [HarperDB Studio sign up page](https://studio.harperdb.io/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your HarperDB Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -2. Review the Privacy Policy and Terms of Service. -3. Click the sign up for free button. -4. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -5. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/enable-mixed-content.md b/versioned_docs/version-4.3/administration/harperdb-studio/enable-mixed-content.md deleted file mode 100644 index 5a198b91..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -Enabling mixed content is required in cases where you would like to connect the HarperDB Studio to HarperDB Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/index.md b/versioned_docs/version-4.3/administration/harperdb-studio/index.md deleted file mode 100644 index 77c090d2..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: HarperDB Studio ---- - -# HarperDB Studio - -HarperDB Studio is the web-based GUI for HarperDB. Studio enables you to administer, navigate, and monitor all of your HarperDB instances in a simple, user-friendly interface without any knowledge of the underlying HarperDB API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - -HarperDB now includes a simplified local Studio that is packaged with all HarperDB installations and served directly from the instance. It can be enabled in the [configuration file](../../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). - ---- - -## How does Studio Work? - -While HarperDB Studio is web based and hosted by us, all database interactions are performed on the HarperDB instance the studio is connected to. The HarperDB Studio loads in your browser, at which point you login to your HarperDB instances. Credentials are stored in your browser cache and are not transmitted back to HarperDB. All database interactions are made via the HarperDB Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -HarperDB Studio enables users to manage both HarperDB Cloud instances and privately hosted instances all from a single UI. All HarperDB instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/instance-configuration.md b/versioned_docs/version-4.3/administration/harperdb-studio/instance-configuration.md deleted file mode 100644 index afab5107..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/instance-configuration.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -HarperDB instance configuration can be viewed and managed directly through the HarperDB Studio. HarperDB Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Applications URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in HarperDB database user_ - -- Created Date (HarperDB Cloud only) - -- Region (HarperDB Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (HarperDB Cloud only) - -- Disk IOPS (HarperDB Cloud only) - -## Update Instance RAM - -HarperDB Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For HarperDB Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if HarperDB Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The HarperDB Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - - If you do have a credit card associated, you will be presented with the updated billing information. - - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The HarperDB instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -2. The instance will begin deleting immediately. - -## Restart Instance - -The HarperDB Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -2. The instance will begin restarting immediately. - -## Instance Config (Read Only) - -A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/instance-metrics.md b/versioned_docs/version-4.3/administration/harperdb-studio/instance-metrics.md deleted file mode 100644 index bf95850b..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The HarperDB Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [HarperDB logs](../logging/standard-logging), and [HarperDB Cloud alarms](../../deployments/harperdb-cloud/alarms) (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/instances.md b/versioned_docs/version-4.3/administration/harperdb-studio/instances.md deleted file mode 100644 index dd1cbd08..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The HarperDB Studio allows you to administer all of your HarperDB instances in one place. HarperDB currently offers the following instance types: - -- **HarperDB Cloud Instance** Managed installations of HarperDB, what we call [HarperDB Cloud](../../deployments/harperdb-cloud/). -- **5G Wavelength Instance** Managed installations of HarperDB running on the Verizon network through AWS Wavelength, what we call [5G Wavelength Instances](../../deployments/harperdb-cloud/verizon-5g-wavelength-instances). _Note, these instances are only accessible via the Verizon network._ -- **Enterprise Instance** Any HarperDB installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. HarperDB stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the HarperDB instances using the standard [HarperDB API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. HarperDB Cloud and Enterprise instances are listed together. - -## Create a New Instance - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New HarperDB Cloud Instance + Register Enterprise Instance** card. -1. Select your desired Instance Type. -1. For a HarperDB Cloud Instance or a HarperDB 5G Wavelength Instance, click **Create HarperDB Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial HarperDB instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial HarperDB instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _HarperDB Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ [_More on instance specs_](../../deployments/harperdb-cloud/instance-size-hardware-specs)_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your HarperDB data will reside. Storage is provisioned based on space and IOPS._ [_More on IOPS Impact on Performance_](../../deployments/harperdb-cloud/iops-impact)_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your HarperDB Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register Enterprise Instance - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization for the instance to be created under. -3. Click the **Create New HarperDB Cloud Instance + Register Enterprise Instance** card. -4. Select **Register Enterprise Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a HarperDB super user that is already configured in your HarperDB installation._ - - 1. Enter Instance Password - - _The password of a HarperDB super user that is already configured in your HarperDB installation._ - - 1. Enter Host - - _The host to access the HarperDB instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the HarperDB instance. HarperDB defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _HarperDB instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The HarperDB Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **HarperDB Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **Enterprise Instance** The instance will be removed from the HarperDB Studio only. This does not uninstall HarperDB from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -HarperDB instances can be resized on the [Instance Configuration](./instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a HarperDB user that is already configured in your HarperDB instance._ - -1. Enter the database password. - - _The password of a HarperDB user that is already configured in your HarperDB instance._ - -1. Click **Log In**. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/login-password-reset.md b/versioned_docs/version-4.3/administration/harperdb-studio/login-password-reset.md deleted file mode 100644 index 163a6dee..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your HarperDB Studio Account - -To log into your existing HarperDB Studio account: - -1. Navigate to the [HarperDB Studio](https://studio.harperdb.io/). -2. Enter your email address. -3. Enter your password. -4. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the HarperDB Studio password reset page. -2. Enter your email address. -3. Click **send password reset email**. -4. If the account exists, you will receive an email with a temporary password. -5. Navigate back to the HarperDB Studio login page. -6. Enter your email address. -7. Enter your temporary password. -8. Click **sign in**. -9. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -10. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the HarperDB Studio profile page. -2. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -3. Click the **Update Password** button. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/manage-applications.md b/versioned_docs/version-4.3/administration/harperdb-studio/manage-applications.md deleted file mode 100644 index 80507035..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/manage-applications.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Manage Applications ---- - -# Manage Applications - -[HarperDB Applications](../../developers/applications/) are enabled by default and can be configured further through the HarperDB Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of HarperDB Applications behavior. - -All Applications configuration and development is handled through the **applications** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **applications** in the instance control bar. - -_Note, the **applications** page will only be available to super users._ - -## Manage Applications - -The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your HarperDB Applications. The editor provides the ability to create new applications or import/deploy remote application packages. - -The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. - -The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the HarperDB Studio. - -## Things to Keep in Mind - -To learn more about developing HarperDB Applications, make sure to read through the [Applications](../../developers/applications/) documentation. - -When working with Applications in the HarperDB Studio, by default the editor will restart the HarperDB Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the HarperDB logs, which can be found on the [status page](./instance-metrics). - -The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your HarperDB instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your HarperDB instance click the "revert" button. - -## Accessing Your Application Endpoints - -Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [HarperDB HTTP port found in the HarperDB configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](./instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. - -Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. - -- **Standard REST Endpoints**\ - Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. -- **Augmented REST Endpoints**\ - HarperDB Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. -- **Fastify Routes**\ - If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [HarperDB Application Template](https://github.com/HarperDB/application-template/blob/main/routes/index.js), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. - -## Creating a New Application - -1. From the application page, click the "+ app" button at the top right. -2. Click "+ Create A New Application Using The Default Template". -3. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. -4. Click OK. -5. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. - -## Editing an Application - -1. From the applications page, click the file you would like to edit from the file navigator on the left. -2. Edit the file with any changes you'd like. -3. Click "save" at the top right. Note, as mentioned above, when you save a file, the HarperDB Applications server will be restarted immediately. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/manage-charts.md b/versioned_docs/version-4.3/administration/harperdb-studio/manage-charts.md deleted file mode 100644 index 672cb776..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/manage-charts.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: Manage Charts ---- - -# Manage Charts - -The HarperDB Studio includes a charting feature within an instance. They are generated in real time based on your existing data and automatically refreshed every 15 seconds. Instance charts can be accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **charts** in the instance control bar. - -## Creating a New Chart - -Charts are generated based on SQL queries, therefore to build a new chart you first need to build a query. Instructions as follows (starting on the charts page described above): - -1. Click **query** in the instance control bar. -1. Enter the SQL query you would like to generate a chart from. - - _For example, using the dog demo data from the API Docs, we can get the average dog age per owner with the following query: `SELECT AVG(age) as avg_age, owner_name FROM dev.dog GROUP BY owner_name`._ - -1. Click **Execute**. -1. Click **create chart** at the top right of the results table. -1. Configure your chart. - 1. Choose chart type. - - _HarperDB Studio offers many standard charting options like line, bar, etc._ - - 1. Choose a data column. - - _This column will be used to plot the data point. Typically, this is the values being calculated in the `SELECT` statement. Depending on the chart type, you can select multiple data columns to display on a single chart._ - - 1. Depending on the chart type, you will need to select a grouping. - - _This could be labeled as x-axis, label, etc. This will be used to group the data, typically this is what you used in your **GROUP BY** clause._ - - 1. Enter a chart name. - - _Used for identification purposes and will be displayed at the top of the chart._ - - 1. Choose visible to all org users toggle. - - _Leaving this option off will limit chart visibility to just your HarperDB Studio user. Toggling it on will enable all users with this Organization to view this chart._ - - 1. Click **Add Chart**. - 1. The chart will now be visible on the **charts** page. - -The example query above, configured as a bar chart, results in the following chart: - -![Average Age per Owner Example](/img/v4.3/ave-age-per-owner-ex.png) - -## Downloading Charts - -HarperDB Studio charts can be downloaded in SVG, PNG, and CSV format. Instructions as follows (starting on the charts page described above): - -1. Identify the chart you would like to export. -1. Click the three bars icon. -1. Select the appropriate download option. -1. The Studio will generate the export and begin downloading immediately. - -## Delete a Chart - -Delete a chart as follows (starting on the charts page described above): - -1. Identify the chart you would like to delete. -1. Click the X icon. -1. Click the **confirm delete chart** button. -1. The chart will be deleted. - -Deleting a chart that is visible to all Organization users will delete it for all users. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/manage-databases-browse-data.md b/versioned_docs/version-4.3/administration/harperdb-studio/manage-databases-browse-data.md deleted file mode 100644 index f6a28f8e..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/manage-databases-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Databases / Browse Data ---- - -# Manage Databases / Browse Data - -Manage instance databases/tables and browse data in tabular format with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage databases and tables, add new data, and more. - -## Manage Databases and Tables - -#### Create a Database - -1. Click the plus icon at the top right of the databases section. -2. Enter the database name. -3. Click the green check mark. - -#### Delete a Database - -Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. - -1. Click the minus icon at the top right of the databases section. -2. Identify the appropriate database to delete and click the red minus sign in the same row. -3. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired database from the databases section. -2. Click the plus icon at the top right of the tables section. -3. Enter the table name. -4. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -5. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired database from the databases section. -2. Click the minus icon at the top right of the tables section. -3. Identify the appropriate table to delete and click the red minus sign in the same row. -4. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -2. This expands the search filters. -3. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -2. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 2. Click **Import From URL**. - 3. The CSV will load, and you will be redirected back to browse table data. -3. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 2. Navigate to your desired CSV file and select it. - 3. Click **Insert X Records**, where X is the number of records in your CSV. - 4. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -2. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -3. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -4. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -2. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -3. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -2. Click the **delete icon**. -3. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-roles.md b/versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-roles.md deleted file mode 100644 index 45030cca..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-roles.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -HarperDB users and roles can be managed directly through the HarperDB Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance role configuration is handled through the **roles** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the HarperDB Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **roles** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -2. Enter the role name. - -3. Click the green check mark. - -4. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. - -5. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -6. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -2. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -3. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the roles section. - -2. Identify the appropriate role to delete and click the red minus sign in the same row. - -3. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-users.md b/versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-users.md deleted file mode 100644 index 369bd492..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -HarperDB users and roles can be managed directly through the HarperDB Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance user configuration is handled through the **users** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -HarperDB instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](./manage-instance-roles)._ - -2. Click **Add User**. - -## Edit a User - -HarperDB instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -2. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 2. Click **Update Password**. - -3. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 2. Click **Update Role**. - -4. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 2. Click **Delete User**. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/manage-replication.md b/versioned_docs/version-4.3/administration/harperdb-studio/manage-replication.md deleted file mode 100644 index 56992ef3..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/manage-replication.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Manage Replication ---- - -# Manage Replication - -HarperDB instance clustering and replication can be configured directly through the HarperDB Studio. It is recommended to read through the [clustering documentation](../../reference/clustering) first to gain a strong understanding of HarperDB clustering behavior. - -All clustering configuration is handled through the **replication** page of the HarperDB Studio, accessed with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **replication** in the instance control bar. - -Note, the **replication** page will only be available to super users. - ---- - -## Initial Configuration - -HarperDB instances do not have clustering configured by default. The HarperDB Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../reference/clustering/creating-a-cluster-user) document before proceeding. - -1. Enter Cluster User username. (Defaults to `cluster_user`). -2. Enter Cluster Password. -3. Review and/or Set Cluster Node Name. -4. Click **Enable Clustering**. - -At this point the Studio will restart your HarperDB Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. - ---- - -## Connect an Instance - -HarperDB Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -2. Identify the instance you would like to connect from the **unconnected instances** panel. - -3. Click the plus icon next the appropriate instance. - -4. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -HarperDB Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -2. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, database, and table for replication to be configured. - -2. For publish, click the toggle switch in the **publish** column. - -3. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/organizations.md b/versioned_docs/version-4.3/administration/harperdb-studio/organizations.md deleted file mode 100644 index 5cc373f6..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -HarperDB Studio organizations provide the ability to group HarperDB Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique HarperDB Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for HarperDB Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the HarperDB Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the **Create a New Organization** card. -3. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your HarperDB Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -4. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the HarperDB Studio Organizations page. -2. Identify the proper organization card and click the trash can icon. -3. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -4. Click the **Do It** button. - -## Manage Users - -HarperDB Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. In the **add user** box, enter the new user’s email address. -5. Click **Add User**. - -Users may or may not already be HarperDB Studio users when adding them to an organization. If the HarperDB Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a HarperDB Studio account, they will receive an email welcoming them to HarperDB Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the HarperDB Studio Organizations page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their HarperDB Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -6. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_HarperDB billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -2. Click **Add Coupon**. -3. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.3/administration/harperdb-studio/query-instance-data.md b/versioned_docs/version-4.3/administration/harperdb-studio/query-instance-data.md deleted file mode 100644 index 588f7d4f..00000000 --- a/versioned_docs/version-4.3/administration/harperdb-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the HarperDB Studio with the following instructions: - -1. Navigate to the [HarperDB Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **query** in the instance control bar. -5. Enter your SQL query in the SQL query window. -6. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The HarperDB Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.3/administration/jobs.md b/versioned_docs/version-4.3/administration/jobs.md deleted file mode 100644 index c626ed5a..00000000 --- a/versioned_docs/version-4.3/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -HarperDB Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](../developers/operations-api/bulk-operations#csv-data-load) - -[csv file load](../developers/operations-api/bulk-operations#csv-file-load) - -[csv url load](../developers/operations-api/bulk-operations#csv-url-load) - -[import from s3](../developers/operations-api/bulk-operations#import-from-s3) - -[delete_records_before](../developers/operations-api/utilities#delete-records-before) - -[export_local](../developers/operations-api/utilities#export-local) - -[export_to_s3](../developers/operations-api/utilities#export-to-s3) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.3/administration/logging/audit-logging.md b/versioned_docs/version-4.3/administration/logging/audit-logging.md deleted file mode 100644 index ea7365ec..00000000 --- a/versioned_docs/version-4.3/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard HarperDB table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart HarperDB for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [HarperDB API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.3/administration/logging/index.md b/versioned_docs/version-4.3/administration/logging/index.md deleted file mode 100644 index fa64b5f2..00000000 --- a/versioned_docs/version-4.3/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -HarperDB provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): HarperDB maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): HarperDB uses a standard HarperDB table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): HarperDB stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.3/administration/logging/standard-logging.md b/versioned_docs/version-4.3/administration/logging/standard-logging.md deleted file mode 100644 index 0e56681a..00000000 --- a/versioned_docs/version-4.3/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -HarperDB maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the HarperDB application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -HarperDB clustering utilizes two [Nats](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of HarperDB and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -HarperDB logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the HarperDB API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.3/administration/logging/transaction-logging.md b/versioned_docs/version-4.3/administration/logging/transaction-logging.md deleted file mode 100644 index 0effb1d3..00000000 --- a/versioned_docs/version-4.3/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -HarperDB offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. HarperDB leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering/). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.3/deployments/_category_.json b/versioned_docs/version-4.3/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/versioned_docs/version-4.3/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/versioned_docs/version-4.3/deployments/configuration.md b/versioned_docs/version-4.3/deployments/configuration.md deleted file mode 100644 index 8edab1b7..00000000 --- a/versioned_docs/version-4.3/deployments/configuration.md +++ /dev/null @@ -1,973 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -HarperDB is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the HarperDB root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -Some configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase: `operationsApi`. - -To change a configuration value edit the `harperdb-config.yaml` file and save any changes. HarperDB must be restarted for changes to take effect. - -Alternately, configuration can be changed via environment and/or command line variables or via the API. To access lower level elements, use underscores to append parent/child elements (when used this way elements are case insensitive): - -``` -- Environment variables: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variables: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Calling `set_configuration` through the API: `operationsApi_network_port: 9925` -``` - -_Note: Component configuration cannot be added or updated via CLI or ENV variables._ - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install HarperDB overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -HarperDB is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using HarperDB to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using HarperDB behind a proxy server or application server, all the remote ip addresses will be the same and HarperDB will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`maxHeaderSize` - _Type_: integer; _Default_: 16394 - -The maximum allowed size of HTTP headers in bytes. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the HarperDB component server uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - maxHeaderSize: 8192 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. -This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -```yaml -http: - mtls: true -``` - -or - -```yaml -http: - mtls: - required: true - user: user-name -``` - ---- - -### `threads` - -The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: - -`count` - _Type_: number; _Default_: One less than the number of logical cores/processors - -The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because HarperDB does have other threads at work), assuming HarperDB is the main service on a server. - -```yaml -threads: - count: 11 -``` - -`debug` - _Type_: boolean | object; _Default_: false - -This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. - -`debug.port` - The port to use for debugging the main thread -`debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. -`debug.host` - Specify the host interface to listen on -`debug.waitForDebugger` - Wait for debugger before starting - -```yaml -threads: - debug: - port: 9249 -``` - -`maxHeapMemory` - _Type_: number; - -```yaml -threads: - maxHeapMemory: 300 -``` - -This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. - ---- - -### `clustering` - -The `clustering` section configures the clustering engine, this is used to replicate data between instances of HarperDB. - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the HarperDB mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - -```yaml -clustering: - leafServer: - streams: - maxConsumeMsgs: 100 - maxIngestThreads: 2 -``` - -`maxConsumeMsgs` - _Type_: integer; _Default_: 100 - -The maximum number of messages a consumer can process in one go. - -`maxIngestThreads` - _Type_: integer; _Default_: 2 - -The number of HarperDB threads that are delegated to ingesting messages. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your HarperDB cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special HarperDB user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local HarperDB Studio, a GUI for HarperDB hosted on the server. A hosted version of the HarperDB Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or HarperDB Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures HarperDB logging across all HarperDB functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root`). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: error - -Control the verbosity of text event logs. - -```yaml -logging: - level: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`root` - _Type_: string; _Default_: \/log - -The path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: false - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log HarperDB logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in HarperDB. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any HarperDB servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the HarperDB Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - domainSocket: /user/hdb/operations-server - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server - -The path to the Unix domain socket used to provide the Operations API through the CLI - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the HarperDB operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the HarperDB operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The HarperDB database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the HarperDB application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: true - -The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`compression.dictionary` _Type_: number; _Default_: null - -Path to a compression dictionary file - -`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` - -Only entries that are larger than this value (in bytes) will be compressed. - -```yaml -storage: - compression: - dictionary: /users/harperdb/dict.txt - threshold: 1000 -``` - -`compactOnStart` - _Type_: boolean; _Default_: false - -When `true` all non-system databases will be compacted when starting HarperDB, read more [here](../administration/compact). - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -Keep the backups made by compactOnStart. - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -`maxTransactionQueueTime` - _Type_: time; _Default_: 45s - -The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). - -```yaml -storage: - maxTransactionQueueTime: 2m -``` - -`noReadAhead` - _Type_: boolean; _Default_: false - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/schema` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by HarperDB. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - -`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS - -Defines the page size of the database. - -```yaml -storage: - pageSize: 4096 -``` - ---- - -### `tls` - -The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`ciphers` - _Type_: string; - -Allows specific ciphers to be set. - -If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: - -```yaml -tls: - - certificate: ~/hdb/keys/certificate1.pem - certificateAuthority: ~/hdb/keys/ca1.pem - privateKey: ~/hdb/keys/privateKey1.pem - host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. - - certificate: ~/hdb/keys/certificate2.pem - certificateAuthority: ~/hdb/keys/ca2.pem - privateKey: ~/hdb/keys/privateKey2.pem -``` - -Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. - ---- - -### `mqtt` - -The MQTT protocol can be configured in this section. - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 - mtls: false - webSocket: true - requireAuthentication: true -``` - -`port` - _Type_: number; _Default_: 1883 - -This is the port to use for listening for insecure MQTT connections. - -`securePort` - _Type_: number; _Default_: 8883 - -This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. - -`webSocket` - _Type_: boolean; _Default_: true - -This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. - -`requireAuthentication` - _Type_: boolean; _Default_: true - -This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. -This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` - -This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. - -For example, you could specify that mTLS is required and will authenticate as "user-name": - -```yaml -mqtt: - network: - mtls: - user: user-name - required: true -``` - ---- - -### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. -This configuration should be set before the database and table have been created. -The configuration will not create the directories in the path, that must be done by the user. - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` - ---- - -### Components - -`` - _Type_: string - -The name of the component. This will be used to name the folder where the component is installed and must be unique. - -`package` - _Type_: string - -A reference to your [component](../developers/components/installing) package.This could be a remote git repo, a local folder/file or an NPM package. -HarperDB will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. - -Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) - -`port` - _Type_: number _Default_: whatever is set in `http.port` - -The port that your component should listen on. If no port is provided it will default to `http.port` - -```yaml -: - package: 'HarperDB-Add-Ons/package-name' - port: 4321 -``` diff --git a/versioned_docs/version-4.3/deployments/harperdb-cli.md b/versioned_docs/version-4.3/deployments/harperdb-cli.md deleted file mode 100644 index e4bf6f33..00000000 --- a/versioned_docs/version-4.3/deployments/harperdb-cli.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: HarperDB CLI ---- - -# HarperDB CLI - -The HarperDB command line interface (CLI) is used to administer [self-installed HarperDB instances](./install-harperdb/). - -## Installing HarperDB - -To install HarperDB with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, HarperDB installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](./configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -#### Environment Variables - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -#### Command Line Arguments - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -## Starting HarperDB - -To start HarperDB after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -## Stopping HarperDB - -To stop HarperDB once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -## Restarting HarperDB - -To restart HarperDB once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -## Getting the HarperDB Version - -To check the version of HarperDB that is installed run the following command: - -```bash -harperdb version -``` - ---- - -## Renew self-signed certificates - -To renew the HarperDB generated self-signed certificates, run: - -```bash -harperdb renew-certs -``` - ---- - -## Copy a database with compaction - -To copy a HarperDB database with compaction (to eliminate free-space and fragmentation), use - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - ---- - -## Get all available CLI commands - -To display all available HarperDB CLI commands along with a brief description run: - -```bash -harperdb help -``` - ---- - -## Get the status of HarperDB and clustering - -To display the status of the HarperDB process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - ---- - -## Backups - -HarperDB uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that HarperDB maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a HarperDB database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with HarperDB shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. - ---- - -# Operations API through the CLI - -Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. -To call the operation use the following convention: ` =`. -By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. - -Some examples are: - -```bash -$ harperdb describe_table database=dev table=dog - -schema: dev -name: dog -hash_attribute: id -audit: true -schema_defined: false -attributes: - - attribute: id - is_primary_key: true - - attribute: name - indexed: true -clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b -record_count: 10 -last_updated_record: 1724483231970.9949 -``` - -`harperdb set_configuration logging_level=error` - -`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` - -`harperdb get_components` - -`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` - -`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` - -`harperdb sql sql='select * from dev.dog where id="1"'` diff --git a/versioned_docs/version-4.3/deployments/harperdb-cloud/alarms.md b/versioned_docs/version-4.3/deployments/harperdb-cloud/alarms.md deleted file mode 100644 index 8bf264d9..00000000 --- a/versioned_docs/version-4.3/deployments/harperdb-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -HarperDB Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harperdb-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | ----------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harperdb-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harperdb-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harperdb-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.3/deployments/harperdb-cloud/index.md b/versioned_docs/version-4.3/deployments/harperdb-cloud/index.md deleted file mode 100644 index 1929f79f..00000000 --- a/versioned_docs/version-4.3/deployments/harperdb-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: HarperDB Cloud ---- - -# HarperDB Cloud - -[HarperDB Cloud](https://studio.harperdb.io/) is the easiest way to test drive HarperDB, it’s HarperDB-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. HarperDB Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new [HarperDB Cloud instance in the HarperDB Studio](../administration/harperdb-studio/instances). diff --git a/versioned_docs/version-4.3/deployments/harperdb-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.3/deployments/harperdb-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 676d536d..00000000 --- a/versioned_docs/version-4.3/deployments/harperdb-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While HarperDB Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.3/deployments/harperdb-cloud/iops-impact.md b/versioned_docs/version-4.3/deployments/harperdb-cloud/iops-impact.md deleted file mode 100644 index e2591631..00000000 --- a/versioned_docs/version-4.3/deployments/harperdb-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -HarperDB, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running HarperDB. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that HarperDB performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers HarperDB Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## HarperDB Cloud Storage - -HarperDB Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all HarperDB Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html). - -## Estimating IOPS for HarperDB Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact HarperDB Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. HarperDB utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any HarperDB operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to HarperDB’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, HarperDB should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.3/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.3/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index 9c84cefa..00000000 --- a/versioned_docs/version-4.3/deployments/harperdb-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your HarperDB instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -HarperDB on Verizon 5G Wavelength brings HarperDB closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from HarperDB to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -HarperDB 5G Wavelength Instance Specs While HarperDB 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## HarperDB 5G Wavelength Storage - -HarperDB 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of HarperDB, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger HarperDB volume. Learn more about the [impact of IOPS on performance here](./iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.3/deployments/install-harperdb/index.md b/versioned_docs/version-4.3/deployments/install-harperdb/index.md deleted file mode 100644 index 4399c247..00000000 --- a/versioned_docs/version-4.3/deployments/install-harperdb/index.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Install HarperDB ---- - -# Install HarperDB - -## Install HarperDB - -This documentation contains information for installing HarperDB locally. Note that if you’d like to get up and running quickly, you can try a [managed instance with HarperDB Cloud](https://studio.harperdb.io/sign-up). HarperDB is a cross-platform database; we recommend Linux for production use, but HarperDB can run on Windows and Mac as well, for development purposes. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. - -HarperDB runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing HarperDB, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start HarperDB - -Then you can install HarperDB with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -HarperDB will automatically start after installation. HarperDB's installation can be configured with numerous options via CLI arguments, for more information visit the [HarperDB Command Line Interface](./harperdb-cli) guide. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harperdb/linux). - -## With Docker - -If you would like to run HarperDB in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a HarperDB container. - -## Offline Install - -If you need to install HarperDB on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -## Installation on Less Common Platforms - -HarperDB comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.3/deployments/install-harperdb/linux.md b/versioned_docs/version-4.3/deployments/install-harperdb/linux.md deleted file mode 100644 index 7187a3c1..00000000 --- a/versioned_docs/version-4.3/deployments/install-harperdb/linux.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install HarperDB. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to HarperDB with sudo privileges exists -1. An additional volume for storing HarperDB files is attached to the Linux instance -1. Traffic to ports 9925 (HarperDB Operations API) 9926 (HarperDB Application Interface) and 9932 (HarperDB Clustering) is permitted - -While you will need to access HarperDB through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start HarperDB - -Here is an example of installing HarperDB with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing HarperDB with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../../deployments/configuration): - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" -``` - -#### Start HarperDB on Boot - -HarperDB will automatically start after installation. If you wish HarperDB to start when the OS boots, you have two options: - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=HarperDB - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [HarperDB Command Line Interface guide](../../deployments/harperdb-cli) and the [HarperDB Configuration File guide](../../deployments/configuration). diff --git a/versioned_docs/version-4.3/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.3/deployments/upgrade-hdb-instance.md deleted file mode 100644 index 3fd61cfd..00000000 --- a/versioned_docs/version-4.3/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -title: Upgrade a HarperDB Instance ---- - -# Upgrade a HarperDB Instance - -This document describes best practices for upgrading self-hosted HarperDB instances. HarperDB can be upgraded using a combination of npm and built-in HarperDB upgrade scripts. Whenever upgrading your HarperDB installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted HarperDB instances only. All [HarperDB Cloud instances](./harperdb-cloud/) will be upgraded by the HarperDB Cloud team. - -## Upgrading - -Upgrading HarperDB is a two-step process. First the latest version of HarperDB must be downloaded from npm, then the HarperDB upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of HarperDB using `npm install -g harperdb`. - - Note `-g` should only be used if you installed HarperDB globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - HarperDB will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -HarperDB supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading HarperDB, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that HarperDB is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure HarperDB is not running: - -```bash -harperdb stop -``` - -Uninstall HarperDB. Note, this step is not required, but will clean up old artifacts of HarperDB. We recommend removing all other HarperDB installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install HarperDB globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start HarperDB - -```bash -harperdb start -``` diff --git a/versioned_docs/version-4.3/developers/_category_.json b/versioned_docs/version-4.3/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/versioned_docs/version-4.3/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/versioned_docs/version-4.3/developers/applications/caching.md b/versioned_docs/version-4.3/developers/applications/caching.md deleted file mode 100644 index e28a5edf..00000000 --- a/versioned_docs/version-4.3/developers/applications/caching.md +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: Caching ---- - -# Caching - -HarperDB has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, HarperDB makes an ideal data caching server. HarperDB can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. HarperDB also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, HarperDB is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](./defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: - -- `expiration` - The amount of time until a record goes stale. -- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). -- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the HarperDB environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyTable } = tables; -MyTable.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), HarperDB will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -HarperDB handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - setInterval(() => { // every second retrieve more data - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - }, 1000); - } - async get() { -... -``` - -Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record. -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, HarperDB will only run the subscribe method on one thread. HarperDB is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in HarperDB. Caching tables also have [subscription capabilities](./caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of HarperDB, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which should also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - let post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). - -## Caching Flow - -It may be helpful to understand the flow of a cache request. When a request is made to a caching table: - -- HarperDB will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. - - If the record is not in the cache, HarperDB will first check if there is a current request to get the record from the source. If there is, HarperDB will wait for the request to complete and return the record from the cache. - - If not, HarperDB will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. - - If the record is in the cache, HarperDB will check if the record is stale. If the record is not stale, HarperDB will immediately return the record from the cache. If the record is stale, HarperDB will call the `get()` method on the source to retrieve the record. - - The record will then be stored in the cache. This will write of the record to the cache will be done in a separate asynchronous/background write-behind transaction, so it does not block the current request, which will return the data immediately once it has it. -- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. - -### Caching Flow with Write-Through - -When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: - -- HarperDB will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. -- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. -- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. - - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). - - The transaction writes will then be written the local caching table. -- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/versioned_docs/version-4.3/developers/applications/debugging.md b/versioned_docs/version-4.3/developers/applications/debugging.md deleted file mode 100644 index d37d9074..00000000 --- a/versioned_docs/version-4.3/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -HarperDB components and applications run inside the HarperDB process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the HarperDB entry script with your IDE, or you can start HarperDB in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run HarperDB in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use HarperDB's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by HarperDB. This logger can be used to output messages directly to the HarperDB log using standardized logging level functions, described below. The log level can be set in the [HarperDB Configuration File](../../deployments/configuration). - -HarperDB Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The HarperDB Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the [Studio Status page](../../administration/harperdb-studio/instance-metrics). Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the HarperDB log. diff --git a/versioned_docs/version-4.3/developers/applications/define-routes.md b/versioned_docs/version-4.3/developers/applications/define-routes.md deleted file mode 100644 index 401213b7..00000000 --- a/versioned_docs/version-4.3/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -HarperDB's applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with HarperDB's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to HarperDB. The same result could have been achieved by hitting the core HarperDB API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against HarperDB, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to HarperDB- the exact same result could have been achieved by hitting the core HarperDB API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Define Helpers](#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against HarperDB directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard HarperDB Operations API (for example, `hdbCore.preValidation[1](./req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with HarperDB using the operations API. The `request.body` should contain a standard HarperDB operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against HarperDB without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.3/developers/applications/defining-schemas.md b/versioned_docs/version-4.3/developers/applications/defining-schemas.md deleted file mode 100644 index 9ca97584..00000000 --- a/versioned_docs/version-4.3/developers/applications/defining-schemas.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in HarperDB's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that HarperDB uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -### Relationships: `@relationship` - -Defining relationships is the foundation of using "join" queries in HarperDB. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. - -#### `@relationship(from: attribute)` - -This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. -For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: - -```graphql -type Product @table @export { - id: ID @primaryKey - brandId: ID @indexed - brand: Brand @relationship(from: brandId) -} -type Brand @table @export { - id: ID @primaryKey -} -``` - -Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resource) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). - -Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: - -```graphql -type Product @table @export { - id: ID @primaryKey - featureIds: [ID] @indexed # array of ids - features: [Feature] @relationship(from: featureIds) # array of referenced feature records -} -type Feature @table { - id: ID @primaryKey - ... -} -``` - -#### `@relationship(to: attribute)` - -This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. -For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: - -```graphql -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: brandId) -} -``` - -Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. - -Note that schemas can also reference themselves with relationships, allow records to define relationships like parent-child relationships between records in the same table. - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself.. - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated with a UUID if no primary key is provided. - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -HarperDB supports the following field types in addition to user defined (object) types: - -- `String`: String/text. -- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647). -- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992). -- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available). -- `BigInt`: Any integer (negative or positive) with less than 300 digits. (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately.) -- `Boolean`: true or false. -- `ID`: A string (but indicates it is not intended to be human readable). -- `Any`: Any primitive, object, or array is allowed. -- `Date`: A Date object. -- `Bytes`: Binary data (as a Buffer or Uint8Array). - -#### Renaming Tables - -It is important to note that HarperDB does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. - -### OpenAPI Specification - -_The [OpenAPI Specification](https://spec.openapis.org/oas/v3.1.0) defines a standard, programming language-agnostic interface description for HTTP APIs, -which allows both humans and computers to discover and understand the capabilities of a service without requiring -access to source code, additional documentation, or inspection of network traffic._ - -If a set of endpoints are configured through a HarperDB GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. - -_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/versioned_docs/version-4.3/developers/applications/example-projects.md b/versioned_docs/version-4.3/developers/applications/example-projects.md deleted file mode 100644 index 1b90c862..00000000 --- a/versioned_docs/version-4.3/developers/applications/example-projects.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Example Projects ---- - -# Example Projects - -**Library of example HarperDB applications and components:** - -- [Authorization in HarperDB using Okta Customer Identity Cloud](https://www.harperdb.io/post/authorization-in-harperdb-using-okta-customer-identity-cloud), by Yitaek Hwang - -- [How to Speed Up your Applications by Caching at the Edge with HarperDB](https://dev.to/doabledanny/how-to-speed-up-your-applications-by-caching-at-the-edge-with-harperdb-3o2l), by Danny Adams - -- [OAuth Authentication in HarperDB using Auth0 & Node.js](https://www.harperdb.io/post/oauth-authentication-in-harperdb-using-auth0-and-node-js), by Lucas Santos - -- [How To Create a CRUD API with Next.js & HarperDB Custom Functions](https://www.harperdb.io/post/create-a-crud-api-w-next-js-harperdb), by Colby Fayock - -- [Build a Dynamic REST API with Custom Functions](https://harperdb.io/blog/build-a-dynamic-rest-api-with-custom-functions/), by Terra Roush - -- [How to use HarperDB Custom Functions to Build your Entire Backend](https://dev.to/andrewbaisden/how-to-use-harperdb-custom-functions-to-build-your-entire-backend-a2m), by Andrew Baisden - -- [Using TensorFlowJS & HarperDB Custom Functions for Machine Learning](https://harperdb.io/blog/using-tensorflowjs-harperdb-for-machine-learning/), by Kevin Ashcraft - -- [Build & Deploy a Fitness App with Python & HarperDB](https://www.youtube.com/watch?v=KMkmA4i2FQc), by Patrick Löber - -- [Create a Discord Slash Bot using HarperDB Custom Functions](https://geekysrm.hashnode.dev/discord-slash-bot-with-harperdb-custom-functions), by Soumya Ranjan Mohanty - -- [How I used HarperDB Custom Functions to Build a Web App for my Newsletter](https://blog.hrithwik.me/how-i-used-harperdb-custom-functions-to-build-a-web-app-for-my-newsletter), by Hrithwik Bharadwaj - -- [How I used HarperDB Custom Functions and Recharts to create Dashboard](https://blog.greenroots.info/how-to-create-dashboard-with-harperdb-custom-functions-and-recharts), by Tapas Adhikary - -- [How To Use HarperDB Custom Functions With Your React App](https://dev.to/tyaga001/how-to-use-harperdb-custom-functions-with-your-react-app-2c43), by Ankur Tyagi - -- [Build a Web App Using HarperDB’s Custom Functions](https://www.youtube.com/watch?v=rz6prItVJZU), livestream by Jaxon Repp - -- [How to Web Scrape Using Python, Snscrape & Custom Functions](https://hackernoon.com/how-to-web-scrape-using-python-snscrape-and-harperdb), by Davis David - -- [What’s the Big Deal w/ Custom Functions](https://rss.com/podcasts/harperdb-select-star/278933/), Select\* Podcast diff --git a/versioned_docs/version-4.3/developers/applications/index.md b/versioned_docs/version-4.3/developers/applications/index.md deleted file mode 100644 index a71cf5a8..00000000 --- a/versioned_docs/version-4.3/developers/applications/index.md +++ /dev/null @@ -1,376 +0,0 @@ ---- -title: Applications ---- - -# Applications - -## Overview of HarperDB Applications - -HarperDB is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of HarperDB instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that HarperDB provides by building a HarperDB component, a fundamental building-block of the HarperDB ecosystem. - -When working through this guide, we recommend you use the [HarperDB Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -## Understanding the Component Application Architecture - -HarperDB provides several types of components. Any package that is added to HarperDB is called a "component", and components are generally categorized as either "applications", which deliver a set of endpoints for users, or "extensions", which are building blocks for features like authentication, additional protocols, and connectors that can be used by other components. Components can be added to the `hdb/components` directory and will be loaded by HarperDB when it starts. Components that are remotely deployed to HarperDB (through the studio or the operation API) are installed into the hdb/node_modules directory. Using `harperdb run .` or `harperdb dev .` allows us to specifically load a certain application in addition to any that have been manually added to `hdb/components` or installed (in `node\_modules`). - -```mermaid -flowchart LR - Client(Client)-->Endpoints - Client(Client)-->HTTP - Client(Client)-->Extensions - subgraph HarperDB - direction TB - Applications(Applications)-- "Schemas" --> Tables[(Tables)] - Applications-->Endpoints[/Custom Endpoints/] - Applications-->Extensions - Endpoints-->Tables - HTTP[/REST/HTTP/]-->Tables - Extensions[/Extensions/]-->Tables - end -``` - -## Getting up and Running - -### Pre-Requisites - -We assume you are running HarperDB version 4.2 or greater, which supports HarperDB Application architecture (in previous versions, this is 'custom functions'). - -### Scaffolding our Application Directory - -Let's create and initialize a new directory for our application. It is recommended that you start by using the [HarperDB application template](https://github.com/HarperDB/application-template). Assuming you have `git` installed, you can create your project directory by cloning: - -```shell -> git clone https://github.com/HarperDB/application-template my-app -> cd my-app -``` - -
- -You can also start with an empty application directory if you'd prefer. - -To create your own application from scratch, you'll may want to initialize it as an npm package with the \`type\` field set to \`module\` in the \`package.json\` so that you can use the EcmaScript module syntax used in this tutorial: - -```shell -> mkdir my-app -> cd my-app -> npm init -y esnext -``` - -
- -
- -If you want to version control your application code, you can adjust the remote URL to your repository. - -Here's an example for a github repo: - -```shell -> git remote set-url origin git@github.com:// -``` - -Locally developing your application and then committing your app to a source control is a great way to manage your code and configuration, and then you can [directly deploy from your repository](#deploying-your-application). - -
- -## Creating our first Table - -The core of a HarperDB application is the database, so let's create a database table! - -A quick and expressive way to define a table is through a [GraphQL Schema](https://graphql.org/learn/schema). Using your editor of choice, edit the file named `schema.graphql` in the root of the application directory, `my-app`, that we created above. To create a table, we will need to add a `type` of `@table` named `Dog` (and you can remove the example table in the template): - -```graphql -type Dog @table { - # properties will go here soon -} -``` - -And then we'll add a primary key named `id` of type `ID`: - -_(Note: A GraphQL schema is a fast method to define tables in HarperDB, but you are by no means required to use GraphQL to query your application, nor should you necessarily do so)_ - -```graphql -type Dog @table { - id: ID @primaryKey -} -``` - -Now we tell HarperDB to run this as an application: - -```shell -> harperdb dev . # tell HarperDB cli to run current directory as an application in dev mode -``` - -HarperDB will now create the `Dog` table and its `id` attribute we just defined. Not only is this an easy way to get create a table, but this schema is included in our application, which will ensure that this table exists wherever we deploy this application (to any HarperDB instance). - -## Adding Attributes to our Table - -Next, let's expand our `Dog` table by adding additional typed attributes for dog `name`, `breed` and `age`. - -```graphql -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} -``` - -This will ensure that new records must have these properties with these types. - -Because we ran `harperdb dev .` earlier (dev mode), HarperDB is now monitoring the contents of our application directory for changes and reloading when they occur. This means that once we save our schema file with these new attributes, HarperDB will automatically reload our application, read `my-app/schema.graphql` and update the `Dog` table and attributes we just defined. The dev mode will also ensure that any logging or errors are immediately displayed in the console (rather only in the log file). - -As a NoSQL database, HarperDB supports heterogeneous records (also referred to as documents), so you can freely specify additional properties on any record. If you do want to restrict the records to only defined properties, you can always do that by adding the `sealed` directive: - -```graphql -type Dog @table @sealed { - id: ID @primaryKey - name: String - breed: String - age: Int - tricks: [String] -} -``` - -If you are using HarperDB Studio, we can now [add JSON-formatted records](../administration/harperdb-studio/manage-databases-browse-data#add-a-record) to this new table in the studio or upload data as [CSV from a local file or URL](../administration/harperdb-studio/manage-databases-browse-data#load-csv-data). A third, more advanced, way to add data to your database is to use the [operations API](./operations-api), which provides full administrative control over your new HarperDB instance and tables. - -## Adding an Endpoint - -Now that we have a running application with a database (with data if you imported any data), let's make this data accessible from a RESTful URL by adding an endpoint. To do this, we simply add the `@export` directive to our `Dog` table: - -```graphql -type Dog @table @export { - id: ID @primaryKey - name: String - breed: String - age: Int - tricks: [String] -} -``` - -By default the application HTTP server port is `9926` (this can be [configured here](../deployments/configuration#http)), so the local URL would be [http://localhost:9926/Dog/](http://localhost:9926/Dog/) with a full REST API. We can PUT or POST data into this table using this new path, and then GET or DELETE from it as well (you can even view data directly from the browser). If you have not added any records yet, we could use a PUT or POST to add a record. PUT is appropriate if you know the id, and POST can be used to assign an id: - -```http -POST /Dog/ -Content-Type: application/json - -{ - "name": "Harper", - "breed": "Labrador", - "age": 3, - "tricks": ["sits"] -} -``` - -With this a record will be created and the auto-assigned id will be available through the `Location` header. If you added a record, you can visit the path `/Dog/` to view that record. Alternately, the curl command `curl http://localhost:9926/Dog/` will achieve the same thing. - -## Authenticating Endpoints - -These endpoints automatically support `Basic`, `Cookie`, and `JWT` authentication methods. See the documentation on [security](./security) for more information on different levels of access. - -By default, HarperDB also automatically authorizes all requests from loopback IP addresses (from the same computer) as the superuser, to make it simple to interact for local development. If you want to test authentication/authorization, or enforce stricter security, you may want to disable the [`authentication.authorizeLocal` setting](../deployments/configuration#authentication). - -### Content Negotiation - -These endpoints support various content types, including `JSON`, `CBOR`, `MessagePack` and `CSV`. Simply include an `Accept` header in your requests with the preferred content type. We recommend `CBOR` as a compact, efficient encoding with rich data types, but `JSON` is familiar and great for web application development, and `CSV` can be useful for exporting data to spreadsheets or other processing. - -HarperDB works with other important standard HTTP headers as well, and these endpoints are even capable of caching interaction: - -``` -Authorization: Basic -Accept: application/cbor -If-None-Match: "etag-id" # browsers can automatically provide this -``` - -## Querying - -Querying your application database is straightforward and easy, as tables exported with the `@export` directive are automatically exposed via [REST endpoints](./rest). Simple queries can be crafted through [URL query parameters](https://en.wikipedia.org/wiki/Query_string). - -In order to maintain reasonable query speed on a database as it grows in size, it is critical to select and establish the proper indexes. So, before we add the `@export` declaration to our `Dog` table and begin querying it, let's take a moment to target some table properties for indexing. We'll use `name` and `breed` as indexed table properties on our `Dog` table. All we need to do to accomplish this is tag these properties with the `@indexed` directive: - -```graphql -type Dog @table { - id: ID @primaryKey - name: String @indexed - breed: String @indexed - owner: String - age: Int - tricks: [String] -} -``` - -And finally, we'll add the `@export` directive to expose the table as a RESTful endpoint - -```graphql -type Dog @table @export { - id: ID @primaryKey - name: String @indexed - breed: String @indexed - owner: String - age: Int - tricks: [String] -} -``` - -Now we can start querying. Again, we just simply access the endpoint with query parameters (basic GET requests), like: - -``` -http://localhost:9926/Dog/?name=Harper -http://localhost:9926/Dog/?breed=Labrador -http://localhost:9926/Dog/?breed=Husky&name=Balto&select=id,name,breed -``` - -Congratulations, you now have created a secure database application backend with a table, a well-defined structure, access controls, and a functional REST endpoint with query capabilities! See the [REST documentation for more information on HTTP access](./rest) and see the [Schema reference](./applications/defining-schemas) for more options for defining schemas. - -## Deploying your Application - -This guide assumes that you're building a HarperDB application locally. If you have a cloud instance available, you can deploy it by doing the following: - -- Commit and push your application component directory code (i.e., the `my-app` directory) to a Github repo. In this tutorial we started with a clone of the application-template. To commit and push to your own repository, change the origin to your repo: `git remote set-url origin git@github.com:your-account/your-repo.git` -- Go to the applications section of your target cloud instance in the [HarperDB Studio](../administration/harperdb-studio/manage-applications). -- In the left-hand menu of the applications IDE, click 'deploy' and specify a package location reference that follows the [npm package specification](https://docs.npmjs.com/cli/v8/using-npm/package-spec) (i.e., a string like `HarperDB/Application-Template` or a URL like `https://github.com/HarperDB/application-template`, for example, that npm knows how to install). - -You can also deploy your application from your repository by directly using the [`deploy_component` operation](./operations-api/components#deploy-component). - -Once you have deployed your application to a HarperDB cloud instance, you can start scaling your application by adding additional instances in other regions. - -With the help of a global traffic manager/load balancer configured, you can distribute incoming requests to the appropriate server. You can deploy and re-deploy your application to all the nodes in your mesh. - -Now, with an application that you can deploy, update, and re-deploy, you have an application that is horizontally and globally scalable! - -## Custom Functionality with JavaScript - -So far we have built an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in HarperDB. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In HarperDB, data is accessed through our [Resource API](../reference/resource), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the HarperDB provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - get(query) { - this.humanAge = 15 + this.age * 5; // silly calculation of human age equivalent - return super.get(query); - } -} -``` - -Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. The instance holds information about the primary key of the record so updates and actions can be applied to the correct record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](./rest#selectproperties)). - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -And next we will use this table in our `get()` method. We will call the new table's (static) `get()` method to retrieve a breed by id. To do this correctly, we access the table using our current context by passing in `this` as the second argument. This is important because it ensures that we are accessing the data atomically, in a consistent snapshot across tables. This provides automatically tracking of most recently updated timestamps across resources for caching purposes. This allows for sharing of contextual metadata (like user who requested the data), and ensure transactional atomicity for any writes (not needed in this get operation, but important for other operations). The resource methods are automatically wrapped with a transaction (will commit/finish when the method completes), and this allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - async get(query) { - let breedDescription = await Breed.get(this.breed, this); - this.breedDescription = breedDescription; - return super.get(query); - } -} -``` - -The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - async post(data) { - if (data.action === 'add-trick') this.tricks.push(data.trick); - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). - -We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post` method or `put` method to do this, but we may want to separate the logic so these methods can be called separately without authorization checks. The [Resource API](../reference/resource) defines `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete`, or to easily configure individual capabilities. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - allowUpdate(user) { - return this.owner === user.username; - } -} -``` - -Any methods that are not defined will fall back to HarperDB's default authorization procedure based on users' roles. If you are using/extending a table, this is based on HarperDB's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to url like // to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get() { - return (await fetch(`https://best-dog-site.com/${this.getId()}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](applications/caching) provides much more information on how to use HarperDB's powerful caching capabilities and set up data sources. - -HarperDB provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../reference/globals) and the [Resource interface](../reference/resource). - -## Configuring Applications/Components - -Every application or component can define their own configuration in a `config.yaml`. If you are using the application template, you will have a [default configuration in this config file](https://github.com/HarperDB/application-template/blob/main/config.yaml) (which is default configuration if no config file is provided). Within the config file, you can configure how different files and resources are loaded and handled. The default configuration file itself is documented with directions. Each entry can specify any `files` that the loader will handle, and can also optionally specify what, if any, URL `path`s it will handle. A path of `/` means that the root URLs are handled by the loader, and a path of `.` indicates that the URLs that start with this application's name are handled. - -This config file allows you define a location for static files, as well (that are directly delivered as-is for incoming HTTP requests). - -Each configuration entry can have the following properties, in addition to properties that may be specific to the individual component: - -- `files`: This specifies the set of files that should be handled the component. This is a glob pattern, so a set of files can be specified like "directory/\*\*". -- `path`: This is the URL path that is handled by this component. -- `root`: This specifies the root directory for mapping file paths to the URLs. For example, if you want all the files in `web/**` to be available in the root URL path via the static handler, you could specify a root of `web`, to indicate that the web directory maps to the root URL path. -- `package`: This is used to specify that this component is a third party package, and can be loaded from the specified package reference (which can be an NPM package, Github reference, URL, etc.). - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. HarperDB includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as HarperDB's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the HarperDB's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, HarperDB will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/versioned_docs/version-4.3/developers/components/drivers.md b/versioned_docs/version-4.3/developers/components/drivers.md deleted file mode 100644 index 3296031f..00000000 --- a/versioned_docs/version-4.3/developers/components/drivers.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: Drivers -description: >- - Industry standard tools to real-time HarperDB data with BI, analytics, - reporting and data visualization technologies. ---- - -# Drivers - -
DriverDocsDownload
Power BIPowerBI DocsWindows
TableauTableau DocsWindows
Mac
Driver JAR
ExcelExcel DocsWindows
JDBCJDBC DocsWindows
Mac
Driver JAR
ODBCODBC DocsWindows
Mac
Linux (RPM)
Linux (DEB)
ADOADO DocsWindows
CmdletsCmdlets DocsWindows
SSISSSIS DocsWindows
diff --git a/versioned_docs/version-4.3/developers/components/google-data-studio.md b/versioned_docs/version-4.3/developers/components/google-data-studio.md deleted file mode 100644 index 4ee8d848..00000000 --- a/versioned_docs/version-4.3/developers/components/google-data-studio.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Google Data Studio ---- - -# Google Data Studio - -[Google Data Studio](https://datastudio.google.com/) is a free collaborative visualization tool which enables users to build configurable charts and tables quickly. The HarperDB Google Data Studio connector seamlessly integrates your HarperDB data with Google Data Studio so you can build custom, real-time data visualizations. - -The HarperDB Google Data Studio Connector is subject to our [Terms of Use](https://harperdb.io/legal/harperdb-cloud-terms-of-service/) and [Privacy Policy](https://harperdb.io/legal/privacy-policy/). - -## Requirements - -The HarperDB database must be accessible through the Internet in order for Google Data Studio servers to access it. The database may be hosted by you or via [HarperDB Cloud](../../deployments/harperdb-cloud/). - -## Get Started - -Get started by selecting the HarperDB connector from the [Google Data Studio Partner Connector Gallery](https://datastudio.google.com/u/0/datasources/create). - -1. Log in to [https://datastudio.google.com/](https://datastudio.google.com/). -1. Add a new Data Source using the HarperDB connector. The current release version can be added as a data source by following this link: [HarperDB Google Data Studio Connector](https://datastudio.google.com/datasources/create?connectorId=AKfycbxBKgF8FI5R42WVxO-QCOq7dmUys0HJrUJMkBQRoGnCasY60_VJeO3BhHJPvdd20-S76g). -1. Authorize the connector to access other servers on your behalf (this allows the connector to contact your database). -1. Enter the Web URL to access your database (preferably with HTTPS), as well as the Basic Auth key you use to access the database. Just include the key, not the word "Basic" at the start of it. -1. Check the box for "Secure Connections Only" if you want to always use HTTPS connections for this data source; entering a Web URL that starts with https:// will do the same thing, if you prefer. -1. Check the box for "Allow Bad Certs" if your HarperDB instance does not have a valid SSL certificate. [HarperDB Cloud](../../deployments/harperdb-cloud/) always has valid certificates, and so will never require this to be checked. Instances you set up yourself may require this, if you are using self-signed certs. If you are using [HarperDB Cloud](../../deployments/harperdb-cloud/) or another instance you know should always have valid SSL certificates, do not check this box. -1. Choose your Query Type. This determines what information the configuration will ask for after pressing the Next button. - - Table will ask you for a Schema and a Table to return all fields of using `SELECT *`. - - SQL will ask you for the SQL query you’re using to retrieve fields from the database. You may `JOIN` multiple tables together, and use HarperDB specific SQL functions, along with the usual power SQL grants. -1. When all information is entered correctly, press the Connect button in the top right of the new Data Source view to generate the Schema. You may also want to name the data source at this point. If the connector encounters any errors, a dialog box will tell you what went wrong so you can correct the issue. -1. If there are no errors, you now have a data source you can use in your reports! You may change the types of the generated fields in the Schema view if you need to (for instance, changing a Number field to a specific currency), as well as creating new fields from the report view that do calculations on other fields. - -## Considerations - -- Both Postman and the [HarperDB Studio](../../administration/harperdb-studio/) app have ways to convert a user:password pair to a Basic Auth token. Use either to create the token for the connector’s user. - - You may sign out of your current user by going to the instances tab in HarperDB Studio, then clicking on the lock icon at the top-right of a given instance’s box. Click the lock again to sign in as any user. The Basic Auth token will be visible in the Authorization header portion of any code created in the Sample Code tab. -- It’s highly recommended that you create a read-only user role in HarperDB Studio, and create a user with that role for your data sources to use. This prevents that authorization token from being used to alter your database, should someone else ever get ahold of it. -- The RecordCount field is intended for use as a metric, for counting how many instances of a given set of values appear in a report’s data set. -- _Do not attempt to create fields with spaces in their names_ for any data sources! Google Data Studio will crash when attempting to retrieve a field with such a name, producing a System Error instead of a useful chart on your reports. Using CamelCase or snake_case gets around this. diff --git a/versioned_docs/version-4.3/developers/components/index.md b/versioned_docs/version-4.3/developers/components/index.md deleted file mode 100644 index 0d71d76d..00000000 --- a/versioned_docs/version-4.3/developers/components/index.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Components ---- - -# Components - -HarperDB is a highly extensible database application platform with support for a rich variety of composable modular components and components that can be used and combined to build applications and add functionality to existing applications. HarperDB tools, components, and add-ons can be found in a few places: - -- [SDK libraries](components/sdks) are available for connecting to HarperDB from different languages. -- [Drivers](components/drivers) are available for connecting to HarperDB from different products and tools. -- [HarperDB-Add-Ons repositories](https://github.com/orgs/HarperDB-Add-Ons/repositories) lists various templates and add-ons for HarperDB. -- [HarperDB repositories](https://github.com/orgs/HarperDB-Add-Ons/repositories) include additional tools for HarperDB. -- You can also [search github.com for ever-growing list of projects that use, or work with, HarperDB](https://github.com/search?q=harperdb&type=repositories) -- [Google Data Studio](components/google-data-studio) is a visualization tool for building charts and tables from HarperDB data. - -## Components - -There are four general categories of components for HarperDB. The most common is applications. Applications are simply a component that delivers complete functionality through an external interface that it defines, and is usually composed of other components. See [our guide to building applications for getting started](../../developers/applications). - -A data source component can implement the Resource API to customize access to a table or provide access to an external data source. External data source components are used to retrieve and access data from other sources. - -The next two are considered extension components. Server protocol extension components provide and define ways for clients to access data and can be used to extend or create new protocols. - -Server resource components implement support for different types of files that can be used as resources in applications. HarperDB includes support for using JavaScript modules and GraphQL Schemas as resources, but resource components may add support for different file types like HTML templates (like JSX), CSV data, and more. - -## Server components - -Server components can be easily be added and configured by simply adding an entry to your harperdb-config.yaml: - -```yaml -my-server-component: - package: 'HarperDB-Add-Ons/package-name' # this can be any valid github or npm reference - port: 4321 -``` - -## Writing Extension Components - -You can write your own extensions to build new functionality on HarperDB. See the [writing extension components documentation](components/writing-extensions) for more information. diff --git a/versioned_docs/version-4.3/developers/components/installing.md b/versioned_docs/version-4.3/developers/components/installing.md deleted file mode 100644 index c9e935d0..00000000 --- a/versioned_docs/version-4.3/developers/components/installing.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: Installing ---- - -# Installing - -Components can be easily added by adding a new top level element to your `harperdb-config.yaml` file. - -The configuration comprises two values: - -- component name - can be anything, as long as it follows valid YAML syntax. -- `package` - a reference to your component. - -```yaml -myComponentName: - package: HarperDB-Add-Ons/package -``` - -Under the hood HarperDB is calling npm install on all components, this means that the package value can be any valid npm reference such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from NPM -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -When HarperDB is run or restarted it checks to see if there are any new or updated components. If there are, it will dynamically create a package.json file in the `rootPath` directory and call `npm install`. - -NPM will install all the components in `/node_moduels`. - -The package.json file that is created will look something like this. - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -## Installing components using the operations API - -To add a component using the operations API use the `deploy_component` operation. - -```json -{ - "operation": "deploy_component", - "project": "my-cool-component", - "package": "HarperDB-Add-Ons/package/mycc" -} -``` - -Another option is to pass `deploy_component` a base64-encoded string representation of your component as a `.tar` file. HarperDB can generate this via the `package_component` operation. When deploying with a payload, your component will be deployed to your `/components` directory. Any components in this directory will be automatically picked up by HarperDB. - -```json -{ - "operation": "deploy_component", - "project": "my-cool-component", - "payload": "NzY1IAAwMDAwMjQgADAwMDAwMDAwMDAwIDE0NDIwMDQ3...." -} -``` diff --git a/versioned_docs/version-4.3/developers/components/operations.md b/versioned_docs/version-4.3/developers/components/operations.md deleted file mode 100644 index 691ce4bb..00000000 --- a/versioned_docs/version-4.3/developers/components/operations.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Operations ---- - -# Operations - -One way to manage applications and components is through [HarperDB Studio](../../administration/harperdb-studio/). It performs all the necessary operations automatically. To get started, navigate to your instance in HarperDB Studio and click the subnav link for "applications". Once configuration is complete, you can manage and deploy applications in minutes. - -HarperDB Studio manages your applications using nine HarperDB operations. You may view these operations within our [API Docs](../operations-api/). A brief overview of each of the operations is below: - -- **components_status** - - Returns the state of the applications server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -- **get_components** - - Returns an array of projects within the applications root project directory. - -- **get_component_file** - - Returns the content of the specified file as text. HarperDB Studio uses this call to render the file content in its built-in code editor. - -- **set_component_file** - - Updates the content of the specified file. HarperDB Studio uses this call to save any changes made through its built-in code editor. - -- **drop_component_file** - - Deletes the specified file. - -- **add_component_project** - - Creates a new project folder in the applications root project directory. It also inserts into the new directory the contents of our applications Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -- **drop_component_project** - - Deletes the specified project folder and all of its contents. - -- **package_component_project** - - Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns that string to the user. - -- **deploy_component_project** - - Takes the output of package_component_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the applications root project directory. diff --git a/versioned_docs/version-4.3/developers/components/sdks.md b/versioned_docs/version-4.3/developers/components/sdks.md deleted file mode 100644 index 04f87e6f..00000000 --- a/versioned_docs/version-4.3/developers/components/sdks.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: SDKs -description: >- - Software Development Kits available for connecting to HarperDB from different - languages. ---- - -# SDKs - -| SDK/Tool | Description | Installation | -| ------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------- | -| [HarperDB.NET.Client](https://www.nuget.org/packages/HarperDB.NET.Client) | A Dot Net Core client to execute operations against HarperDB | `dotnet add package HarperDB.NET.Client --version 1.1.0` | -| [Websocket Client](https://www.npmjs.com/package/harperdb-websocket-client) | A Javascript client for real-time access to HarperDB transactions | `npm i -s harperdb-websocket-client` | -| [Gatsby HarperDB Source](https://www.npmjs.com/package/gatsby-source-harperdb) | Use HarperDB as the data source for a Gatsby project at the build time | `npm i -s gatsby-source-harperdb` | -| [HarperDB.EntityFrameworkCore](https://www.nuget.org/packages/HarperDB.EntityFrameworkCore) | The HarperDB EntityFrameworkCore Provider Package for .NET 6.0 | `dotnet add package HarperDB.EntityFrameworkCore --version 1.0.0` | -| [Python SDK](https://pypi.org/project/harperdb/) | Python3 implementations of HarperDB API functions with wrappers for an object-oriented interface | `pip3 install harperdb` | -| [HarperDB Flutter SDK](https://github.com/HarperDB/harperdb-sdk-flutter) | A HarperDB SDK for Flutter | `flutter pub add harperdb` | -| [React Hook](https://www.npmjs.com/package/use-harperdb) | A ReactJS Hook for HarperDB | `npm i -s use-harperdb` | -| [Node Red Node](https://flows.nodered.org/node/node-red-contrib-harperdb) | Easy drag and drop connections to HarperDB using the Node-Red platform | `npm i -s node-red-contrib-harperdb` | -| [NodeJS SDK](https://www.npmjs.com/package/harperive) | A HarperDB SDK for NodeJS | `npm i -s harperive` | -| [HarperDB Cargo Crate](https://crates.io/crates/harperdb) | A HarperDB SDK for Rust | `Cargo.toml > harperdb = '1.0.0'` | diff --git a/versioned_docs/version-4.3/developers/components/writing-extensions.md b/versioned_docs/version-4.3/developers/components/writing-extensions.md deleted file mode 100644 index 155cfa5f..00000000 --- a/versioned_docs/version-4.3/developers/components/writing-extensions.md +++ /dev/null @@ -1,175 +0,0 @@ ---- -title: Writing Extensions ---- - -# Writing Extensions - -HarperDB is a highly extensible database application platform with support for a rich variety of composable modular components and extensions that can be used and combined to build applications and add functionality to existing applications. Here we describe the different types of components/extensions that can be developed for HarperDB and how to create them. - -There are three general categories of components for HarperDB: - -- **protocol extensions** that provide and define ways for clients to access data -- **resource extensions** that handle and interpret different types of files -- **consumer data sources** that provide a way to access and retrieve data from other sources. - -Server protocol extensions can be used to implement new protocols like MQTT, AMQP, Kafka, or maybe a retro-style Gopher interface. It can also be used to augment existing protocols like HTTP with "middleware" that can add authentication, analytics, or additional content negotiation, or add layer protocols on top of WebSockets. - -Server resource extensions implement support for different types of files that can be used as resources in applications. HarperDB includes support for using JavaScript modules and GraphQL Schemas as resources, but resource extensions could be added to support different file types like HTML templates (like JSX), CSV data, and more. - -Consumer data source components are used to retrieve and access data from other sources, and can be very useful if you want to use HarperDB to cache or use data from other databases like MySQL, Postgres, or Oracle, or subscribe to data from messaging brokers (again possibly Kafka, NATS, etc.). - -These are not mutually exclusive, you may build components that fulfill any or all of these roles. - -## Server Extensions - -Server Extensions are implemented as JavaScript packages/modules and interact with HarperDB through a number of possible hooks. A component can be defined as an extension by specifying the extensionModule in the config.yaml: - -```yaml -extensionModule: './entry-module-name.js' -``` - -### Module Initialization - -Once a user has configured an extension, HarperDB will attempt to load the extension package specified by `package` property. Once loaded, there are several functions that the extension module can export, that will be called by HarperDB: - -`export function start(options: { port: number, server: {}})` If defined, this will be called on the initialization of the extension. The provided `server` property object includes a set of additional entry points for utilizing or layering on top of other protocols (and when implementing a new protocol, you can add your own entry points). The most common entry is to provide an HTTP middleware layer. This looks like: - -```javascript -export function start(options: { port: number, server: {}}) { - options.server.http(async (request, nextLayer) => { - // we can directly return a response here, or do some processing on the request and delegate to the next layer - let response = await nextLayer(request); - return response; - }); -} -``` - -Here, the `request` object will have the following structure (this is based on Node's request, but augmented to conform to a subset of the [WHATWG Request API](https://developer.mozilla.org/en-US/docs/Web/API/Request)): - -```typescript -interface Request { - method: string; - headers: Headers; // use request.headers.get(headerName) to get header values - body: Stream; - data: any; // deserialized data from the request body -} -``` - -The returned `response` object should have the following structure (again, following a structural subset of the [WHATWG Response API](https://developer.mozilla.org/en-US/docs/Web/API/Response)): - -```typescript -interface Response { - status?: number; - headers?: {}; // an object with header name/values - data?: any; // object/value that will be serialized into the body - body?: Stream; -} -``` - -The `server.http` function also accepts an options argument that supports a `runFirst` flag to indicate that the middleware should go at the top of the stack and be executed prior to other HTTP components. -If you were implementing an authentication extension, you could get authentication information from the request and use it to add the `user` property to the request: - -```javascript -export function start(options: { port: number, server: {}, resources: Map}) { - options.server.http((request, nextLayer) => { - let authorization = request.headers.authorization; - if (authorization) { - // get some token for the user and determine the user - // if we want to use harperdb's user database - let user = server.getUser(username, password); - request.user = user; // authenticate user object goes on the request - } - // continue on to the next layer - return nextLayer(request); - }, { runFirst: true }); - // if you needed to add a login resource, could add it as well: - resources.set('/login', LoginResource); -} -``` - -#### Direct Socket Server - -If you were implementing a new protocol, you can directly interact with the sockets and listen for new incoming TCP connections: - -```javascript -export function start(options: { port: number, server: {}}) { - options.server.socket((socket) => { - // called for each incoming socket - }); -}) -``` - -#### WebSockets - -If you were implementing a protocol using WebSockets, you can define a listener for incoming WebSocket connections and indicate the WebSockets (sub)protocol to specifically handle (which will select your listener if the `Sec-WebSocket-Protocol` header matches your protocol): - -```javascript -export function start(options) { - server.ws((socket) => { - // called for each incoming WebSocket - }, Object.assign({ subProtocol: 'my-cool-protocol' }, options)); -}) -``` - -### Resource Handling - -Typically, servers not only communicate with clients, but serve up meaningful data based on the resources within the server. While resource extensions typically handle defining resources, once resources are defined, they can be consumed by server extensions. The `resources` argument provides access to the set of all the resources that have been defined. A server can call `resources.getMatch(path)` to get the resource associated with the URL path. - -## Resource Extensions - -Resource extensions allow us to handle different files and make them accessible to servers as resources, following the common [Resource API](../../reference/resource). To implement a resource extension, you export a function called `handleFile`. Users can then configure which files that should be handled by your extension. For example, if we had implemented an EJS handler, it could be configured as: - -```yaml - module: 'ejs-extension', - path: '/templates/*.ejs' -``` - -And in our extension module, we could implement `handleFile`: - -```javascript -export function handleFile?(contents, relative_path, file_path, resources) { - // will be called for each .ejs file. - // We can then add the generate resource: - resources.set(relative_path, GeneratedResource); -} -``` - -We can also implement a handler for directories. This can be useful for implementing a handler for broader frameworks that load their own files, like Next.js or Remix, or a static file handler. HarperDB includes such an extension for fastify's auto-loader that loads a directory of route definitions. This hook looks like: - -```javascript -export function handleDirectory?(relative_path, path, resources) { -} -``` - -Note that these hooks are not mutually exclusive. You can write an extension that implements any or all of these hooks, potentially implementing a custom protocol and file handling. - -## Data Source Components - -Data source components implement the `Resource` interface to provide access to various data sources, which may be other APIs, databases, or local storage. Components that implement this interface can then be used as a source for caching tables, can be accessed as part of endpoint implementations, or even used as endpoints themselves. See the [Resource documentation](../../reference/resource) for more information on implementing new resources. - -## Content Type Extensions - -HarperDB uses content negotiation to determine how to deserialize content incoming data from HTTP requests (and any other protocols that support content negotiation) and to serialize data into responses. This negotiation is performed by comparing the `Content-Type` header with registered content type handler to determine how to deserialize content into structured data that is processed and stored, and comparing the `Accept` header with registered content type handlers to determine how to serialize structured data. HarperDB comes with a rich set of content type handlers including JSON, CBOR, MessagePack, CSV, Event-Stream, and more. However, you can also add your own content type handlers by adding new entries (or even replacing existing entries) to the `contentTypes` exported map from the `server` global (or `harperdb` export). This map is keyed by the MIME type, and the value is an object with properties (all optional): - -- `serialize(data): Buffer|Uint8Array|string`: If defined, this will be called with the data structure and should return the data serialized as binary data (NodeJS Buffer or Uint8Array) or a string, for the response. -- `serializeStream(data): ReadableStream`: If defined, this will be called with the data structure and should return the data serialized as a ReadableStream. This is generally necessary for handling asynchronous iteratables. -- `deserialize(Buffer|string): any`: If defined (and deserializeStream is not defined), this will be called with the raw data received from the incoming request and should return the deserialized data structure. This will be called with a string for text MIME types ("text/..."), and a Buffer for all others. -- `deserializeStream(ReadableStream): any`: If defined, this will be called with the raw data stream (if there is one) received from the incoming request and should return the deserialized data structure (potentially as an asynchronous iterable). -- `q: number`: This is an indication of this serialization quality between 0 and 1, and if omitted, defaults to 1. It is called "content negotiation" instead of "content demanding" because both client and server may have multiple supported content types, and the server needs to choose the best for both. This is determined by finding the content type (of all supported) with the highest product of client q and server q (1 is a perfect representation of the data, 0 is worst, 0.5 is medium quality). - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` - -## Trusted/Untrusted (Future Plans) - -In the future, extensions may be categorized as trusted or untrusted. For some HarperDB installations, administrators may choose to constrain users to only using trusted extensions for security reasons (such multi-tenancy requirements or added defense in depth). Most installations do not impose such constraints, but this may exist in some situations. - -An extension can be automatically considered trusted if it conforms to the requirements of [Secure EcmaScript](https://www.npmjs.com/package/ses/v/0.7.0) (basically strict mode code that doesn't modify any global objects), and either does not use any other modules, or only uses modules from other trusted extensions/components. An extension can be marked as trusted by review by the HarperDB team as well, but developers should not expect that HarperDB can review all extensions. Untrusted extensions can access any other packages/modules, and may have many additional capabilities. diff --git a/versioned_docs/version-4.3/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.3/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index cc4634fc..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created HarperDB will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.3/developers/operations-api/bulk-operations.md deleted file mode 100644 index b6f6a07f..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into HarperDB - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which HarperDB is running. For example, the path to a CSV on your computer will produce an error if your HarperDB instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running harperdb - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/clustering.md b/versioned_docs/version-4.3/developers/operations-api/clustering.md deleted file mode 100644 index 7843c545..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/clustering.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional HarperDB instance with associated subscriptions. Learn more about[HarperDB clustering here](../../reference/clustering) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing HarperDB instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about[HarperDB clustering here](../../reference/clustering) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Set Node Replication - -A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_node_replication` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: - - `database` _(optional)_ - the database to replicate from - - `table` _(required)_ - the table to replicate from - - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table -- - -### Body - -```json -{ - "operation": "set_node_replication", - "node_name": "node1", - "subscriptions": [ - { - "table": "dog", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about[HarperDB clustering here](../../reference/clustering) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about[HarperDB clustering here](../../reference/clustering) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a HarperDB instance and associated subscriptions from the cluster. Learn more about[HarperDB clustering here](../../reference/clustering) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about[HarperDB clustering here](../../reference/clustering) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Purge Stream - -Will purge messages from a stream - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `purge_stream` -- `database` _(required)_ - the name of the database where the streams table resides -- `table` _(required)_ - the name of the table that belongs to the stream -- `options` _(optional)_ - control how many messages get purged. Options are: - - `keep` - purge will keep this many most recent messages - - `seq` - purge all messages up to, but not including, this sequence - -### Body - -```json -{ - "operation": "purge_stream", - "database": "dev", - "table": "dog", - "options": { - "keep": 100 - } -} -``` - ---- diff --git a/versioned_docs/version-4.3/developers/operations-api/components.md b/versioned_docs/version-4.3/developers/operations-api/components.md deleted file mode 100644 index 0abc5406..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/components.md +++ /dev/null @@ -1,314 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a predefined template. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website.\ - -If deploying with the `payload` option, HarperDB will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory.\ - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have an installed SSH keys on the server: - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/custom-functions.md b/versioned_docs/version-4.3/developers/operations-api/custom-functions.md deleted file mode 100644 index 7b483c8a..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,277 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDB Studio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. HarperDB Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.3/developers/operations-api/databases-and-tables.md deleted file mode 100644 index cb5aedb8..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,386 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts about 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** - -_Note: HarperDB will automatically create new attributes on insert and update if they do not already exist within the database._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with HarperDB not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (HarperDB actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup HarperDB databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/index.md b/versioned_docs/version-4.3/developers/operations-api/index.md deleted file mode 100644 index cb83098b..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/index.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling HarperDB. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../../deployments/configuration), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security/basic-auth) or [JWT authentication](./security/jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [Utilities](operations-api/utilities) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/jobs.md b/versioned_docs/version-4.3/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/logs.md b/versioned_docs/version-4.3/developers/operations-api/logs.md deleted file mode 100644 index 56142466..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/logs.md +++ /dev/null @@ -1,732 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read HarperDB Log - -Returns log outputs from the primary HarperDB log based on the provided search criteria. Read more about HarperDB logging here: [https://docs.harperdb.io/docs/4.3/administration/logging#read-logs-via-the-api](../../administration/logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` -- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.3/administration/logging/transaction-logging#read_transaction_log](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.3/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.3/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values [`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.3/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the HarperDB user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the HarperDB configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). Read more about HarperDB transaction logs here: [https://docs.harperdb.io/docs/4.3/administration/logging/transaction-logging#read_audit_log](../../administration/logging/transaction-logging#read_audit_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the HarperDB configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.3/developers/operations-api/nosql-operations.md deleted file mode 100644 index a644fea8..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,384 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `upsert` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `search_attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_value` _(required)_ - value you wish to search - wild cards are allowed -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "search_attribute": "owner_name", - "search_value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: - - `attribute` _(required)_ - The attribute to sort by - - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) - - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: - - `search_attribute` _(required)_ - the attribute you wish to search, can be any attribute - - `search_type` _(required)_ - the type of search to perform - `equals`, `not_equal`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_value` _(required)_ - case-sensitive value you wish to search. If the `search_type` is `between` then use an array of two values to search between (both inclusive) - Or a set of grouped conditions has the following properties: - - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` - - `conditions` _(required)_ - the array of conditions objects as described above. - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "sort": { - "attribute": "id", - "next": { - "attribute": "age", - "descending": true - } - }, - "get_attributes": ["*"], - "conditions": [ - { - "search_attribute": "age", - "search_type": "between", - "search_value": [5, 8] - }, - { - "search_attribute": "weight_lbs", - "search_type": "greater_than", - "search_value": 40 - }, - { - "operator": "or", - "conditions": [ - { - "search_attribute": "adorable", - "search_type": "equals", - "search_value": true - }, - { - "search_attribute": "lovable", - "search_type": "equals", - "search_value": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.3/developers/operations-api/quickstart-examples.md deleted file mode 100644 index 4d41ab4c..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -HarperDB recommends utilizing [HarperDB Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the HarperDB Operations API. - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in HarperDB are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. - -HarperDB does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and HarperDB will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -HarperDB supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/registration.md b/versioned_docs/version-4.3/developers/operations-api/registration.md deleted file mode 100644 index 7812e843..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/registration.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the HarperDB instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Get Fingerprint - -Returns the HarperDB fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -Sets the HarperDB license as generated by HarperDB License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/sql-operations.md b/versioned_docs/version-4.3/developers/operations-api/sql-operations.md deleted file mode 100644 index 4525fe17..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: SQL Operations ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/token-authentication.md b/versioned_docs/version-4.3/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.3/developers/operations-api/users-and-roles.md deleted file mode 100644 index c65c2c0a..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "developer" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. HarperDB will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your HarperDB instance. If set to false, user will not be able to access your instance of HarperDB. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. HarperDB will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your HarperDB instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. [Learn more about HarperDB roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/versioned_docs/version-4.3/developers/operations-api/utilities.md b/versioned_docs/version-4.3/developers/operations-api/utilities.md deleted file mode 100644 index 15b9b9f9..00000000 --- a/versioned_docs/version-4.3/developers/operations-api/utilities.md +++ /dev/null @@ -1,377 +0,0 @@ ---- -title: Utilities ---- - -# Utilities - -## Restart - -Restarts the HarperDB instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified HarperDB service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` - ---- - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` -- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Install Node Modules - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` - ---- - -## Set Configuration - -Modifies the HarperDB configuration file parameters. Must follow with a [restart](#restart) or [restart_service](#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the HarperDB configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the HarperDB configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the HarperDB configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "clustering": { - "enabled": true, - "hubServer": { - "cluster": { - "name": "harperdb", - "network": { - "port": 12345, - "routes": null - } - }, - "leafNodes": { - "network": { - "port": 9931 - } - }, - "network": { - "port": 9930 - } - }, - "leafServer": { - "network": { - "port": 9940, - "routes": null - }, - "streams": { - "maxAge": null, - "maxBytes": null, - "maxMsgs": null, - "path": "/Users/hdb/clustering/leaf" - } - }, - "logLevel": "info", - "nodeName": "node1", - "republishMessages": false, - "databaseLevel": false, - "tls": { - "certificate": "/Users/hdb/keys/certificate.pem", - "certificateAuthority": "/Users/hdb/keys/ca.pem", - "privateKey": "/Users/hdb/keys/privateKey.pem", - "insecure": true, - "verify": true - }, - "user": "cluster_user" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "certificate": "/Users/hdb/keys/certificate.pem", - "certificateAuthority": "/Users/hdb/keys/ca.pem", - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` diff --git a/versioned_docs/version-4.3/developers/real-time.md b/versioned_docs/version-4.3/developers/real-time.md deleted file mode 100644 index be29cd6a..00000000 --- a/versioned_docs/version-4.3/developers/real-time.md +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -HarperDB provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. HarperDB supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -HarperDB real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a HarperDB application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -HarperDB is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -HarperDB supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in HarperDB is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -HarperDB supports MQTT with its `mqtt` server module and HarperDB supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - mTLS: false - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). - -#### Capabilities - -HarperDB's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In HarperDB topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -HarperDB supports QoS 0 and 1 for publishing and subscribing. - -HarperDB supports multi-level topics, both for subscribing and publishing. HarperDB also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. HarperDB currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -### Ordering - -HarperDB is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. HarperDB is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, HarperDB does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because HarperDB prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and HarperDB will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Keep-Alive monitoring | | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.3/developers/rest.md b/versioned_docs/version-4.3/developers/rest.md deleted file mode 100644 index 2828ea66..00000000 --- a/versioned_docs/version-4.3/developers/rest.md +++ /dev/null @@ -1,391 +0,0 @@ ---- -title: REST ---- - -# REST - -HarperDB provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](./applications/) and [defining schemas](./applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](./applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -## GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -#### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. - -## PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -## DELETE - -This can be used to delete a record or records. - -## `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -## `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -## POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -## Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in HarperDB. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the attributes needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /Product/?category=software&price=gt=100&price=lt=200 -``` - -Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: - -``` -GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z -``` - -You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: - -```http -GET /Product/?name==Keyboard* -``` - -Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. - -Here is a full list of the supported FIQL-style operators/comparators: - -- `==`: equal -- `=lt=`: less than -- `=le=`: less than or equal -- `=gt=`: greater than -- `=ge=`: greater than or equal -- `=ne=`, !=: not equal -- `=ct=`: contains the value (for strings) -- `=sw=`, `==*`: starts with the value (for strings) -- `=ew=`: ends with the value (for strings) -- `=`, `===`: strict equality (no type conversion) -- `!==`: strict inequality (no type conversion) - -### Unions - -Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: - -```http -GET /Product/?rating=5|featured=true -``` - -### Grouping of Operators - -Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: - -```http -GET /Product/?rating=5|(price=gt=100&price=lt=200) -``` - -Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: - -```http -GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] -``` - -And the tags could be safely generated from user inputs in a tag array like: - -```javascript -let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; -``` - -More complex queries can be created by further nesting groups: - -```http -GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] -``` - -## Query Calls - -HarperDB has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -### `select(properties)` - -This function allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. -- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. - -To get a list of product names with a category of software: - -```http -GET /Product/?category=software&select(name) -``` - -### `limit(start,end)` or `limit(end)` - -This function specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -### `sort(property)`, `sort(+property,-property,...)` - -This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. - -For example, to sort by product name (in ascending order): - -```http -GET /Product?rating=gt=3&sort(+name) -``` - -To sort by rating in ascending order, then by price in descending order for products with the same rating: - -```http -GET /Product?sort(+rating,-price) -``` - -# Relationships - -HarperDB supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. - -## Chained Attributes and Joins - -To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides HarperDB's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - brandId: ID @indexed - brand: Brand @relationship(from: "brandId") -} -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: "brandId") -} -``` - -And then you could query a product by brand name: - -```http -GET /Product/?brand.name=Microsoft -``` - -This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. - -The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: - -```http -GET /Brand/?products.name=Keyboard -``` - -This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". - -### Chained/Nested Select - -Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand) -``` - -We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand{name}) -``` - -Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: - -```http -GET /Product/?name=Keyboard&select(name,brand{name,id}) -``` - -When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. - -### Many-to-many Relationships (Array of Foreign Keys) - -Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - resellerIds: [ID] @indexed - resellers: [Reseller] @relationship(from: "resellerId") -} -type Reseller @table { - id: ID @primaryKey - name: String - ... -} -``` - -The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": - -```http -GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) -``` - -One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): - -```http -PUT /Product/123 -Content-Type: application/json - -{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], -...} -``` - -### Type Conversion - -Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: - -- `name==null`: Will convert the value to `null` for searching. -- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==number:123`: Will explicitly convert the value after "number:" to a number. -- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. -- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) -- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. - -If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. - -For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. - -### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, HarperDB supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/versioned_docs/version-4.3/developers/security/basic-auth.md b/versioned_docs/version-4.3/developers/security/basic-auth.md deleted file mode 100644 index 0b73f479..00000000 --- a/versioned_docs/version-4.3/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -HarperDB uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -** \_**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.**\_ ** - -A header is added to each HTTP request. The header key is `Authorization` the header value is `Basic <>`. - -## Authentication in HarperDB Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for HarperDB. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.3/developers/security/certificate-management.md b/versioned_docs/version-4.3/developers/security/certificate-management.md deleted file mode 100644 index d669f078..00000000 --- a/versioned_docs/version-4.3/developers/security/certificate-management.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for HarperDB external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of HarperDB does not have HTTPS enabled (see [configuration](../../deployments/configuration) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart HarperDB. - -By default HarperDB will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your HarperDB node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your HarperDB node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable HarperDB HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart HarperDB. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the HarperDB configuration with the path of your new certificate files, and then restart HarperDB. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set HarperDB will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for HarperDB, Nginx can be used as a reverse proxy for HarperDB. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to HarperDB as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for HarperDB, a number of different external services can be used as a reverse proxy for HarperDB. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to HarperDB as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for HarperDB administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.3/developers/security/configuration.md b/versioned_docs/version-4.3/developers/security/configuration.md deleted file mode 100644 index c8134aac..00000000 --- a/versioned_docs/version-4.3/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -HarperDB was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with HarperDB. - -## CORS - -HarperDB allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, HarperDB enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harperdb.io,https://products.harperdb.io` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -HarperDB provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose HarperDB's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -HarperDB automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from HarperDB Operations API.** diff --git a/versioned_docs/version-4.3/developers/security/index.md b/versioned_docs/version-4.3/developers/security/index.md deleted file mode 100644 index 407afb17..00000000 --- a/versioned_docs/version-4.3/developers/security/index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Security ---- - -# Security - -HarperDB uses role-based, attribute-level security to ensure that users can only gain access to the data they’re supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -- [JWT Authentication](security/jwt-auth) -- [Basic Authentication](security/basic-auth) -- [mTLS Authentication](security/mtls-auth) -- [Configuration](security/configuration) -- [Users and Roles](security/users-and-roles) diff --git a/versioned_docs/version-4.3/developers/security/jwt-auth.md b/versioned_docs/version-4.3/developers/security/jwt-auth.md deleted file mode 100644 index 4b3ea934..00000000 --- a/versioned_docs/version-4.3/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -HarperDB uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all HarperDB operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their HarperDB credentials. The following POST body is sent to HarperDB. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by HarperDB. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by HarperDB. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.3/developers/security/mtls-auth.md b/versioned_docs/version-4.3/developers/security/mtls-auth.md deleted file mode 100644 index be5cc5a6..00000000 --- a/versioned_docs/version-4.3/developers/security/mtls-auth.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: mTLS Authentication ---- - -# mTLS Authentication - -HarperDB supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. diff --git a/versioned_docs/version-4.3/developers/security/users-and-roles.md b/versioned_docs/version-4.3/developers/security/users-and-roles.md deleted file mode 100644 index e5b155bd..00000000 --- a/versioned_docs/version-4.3/developers/security/users-and-roles.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -HarperDB utilizes a Role-Based Access Control (RBAC) framework to manage access to HarperDB instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in HarperDB - -Role permissions in HarperDB are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a HarperDB instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. HarperDB will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within HarperDB. See full breakdown of operations restricted to only super_user roles [here](#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a HarperDB instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a HarperDB instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```jsonc -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "database_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true, - }, - ], - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [], - }, - }, - }, - }, -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within HarperDB, but ignored, as super_users have full access to the database._ - -- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] -} -``` - -**Important Notes About Table Permissions** - -1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that HarperDB manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in HarperDB and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ - -| Databases and Tables | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| describe_all | | -| describe_database | | -| describe_table | | -| create_database | X | -| drop_database | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed HarperDB as `<>`. Because HarperDB stores files natively on the operating system, we only allow the HarperDB executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files HarperDB needs. This also keeps HarperDB more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.3/getting-started.md b/versioned_docs/version-4.3/getting-started.md deleted file mode 100644 index e2a238ae..00000000 --- a/versioned_docs/version-4.3/getting-started.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Getting Started ---- - -# Getting Started - -HarperDB is designed for quick and simple setup and deployment, with smart defaults that lead to fast, scalable, and globally distributed database applications. - -You can easily create a HarperDB database in the cloud through our studio or install it locally. The quickest way to get HarperDB up and running is with [HarperDB Cloud](./deployments/harperdb-cloud/), our database-as-a-service offering. However, HarperDB is a [database application platform](./developers/applications/), and to leverage HarperDB’s full application development capabilities of defining schemas, endpoints, messaging, and gateway capabilities, you may wish to install and run HarperDB locally so that you can use your standard local IDE tools, debugging, and version control. - -### Installing a HarperDB Instance - -You can simply install HarperDB with npm (or yarn, or other package managers): - -```shell -npm install -g harperdb -``` - -Here we installed HarperDB globally (and we recommend this) to make it easy to run a single HarperDB instance with multiple projects, but you can install it locally (not globally) as well. - -You can run HarperDB by running: - -```javascript -harperdb; -``` - -You can now use HarperDB as a standalone database. You can also create a cloud instance (see below), which is also an easy way to get started. - -#### Developing Database Applications with HarperDB - -HarperDB is more than just a database, with HarperDB you build "database applications" which package your schema, endpoints, and application logic together. You can then deploy your application to an entire cluster of HarperDB instances, ready to scale to on-the-edge delivery of data and application endpoints directly to your users. To get started with HarperDB, take a look at our application development guide, with quick and easy examples: - -[Database application development guide](./developers/applications/) - -### Setting up a Cloud Instance - -To set up a HarperDB cloud instance, simply sign up and create a new instance: - -1. [Sign up for the HarperDB Studio](https://studio.harperdb.io/sign-up) -1. [Create a new HarperDB Cloud instance](./administration/harperdb-studio/instances#create-a-new-instance) - -Note that a local instance and cloud instance are not mutually exclusive. You can register your local instance in the HarperDB Studio, and a common development flow is to develop locally and then deploy your application to your cloud instance. - -HarperDB Cloud instance provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -#### Using the HarperDB Studio - -Now that you have a HarperDB instance, if you want to use HarperDB as a standalone database, you can fully administer and interact with our database through the Studio. This section links to appropriate articles to get you started interacting with your data. - -1. [Create a database](./administration/harperdb-studio/manage-databases-browse-data#create-a-database) -1. [Create a table](./administration/harperdb-studio/manage-databases-browse-data#create-a-table) -1. [Add a record](./administration/harperdb-studio/manage-databases-browse-data#add-a-record) -1. [Load CSV data](./administration/harperdb-studio/manage-databases-browse-data#load-csv-data) (Here’s a sample CSV of the HarperDB team’s dogs) -1. [Query data via SQL](./administration/harperdb-studio/query-instance-data) - -## Administering HarperDB - -If you are deploying and administering HarperDB, you may want to look at our [configuration documentation](./deployments/configuration) and our administrative operations API below. - -### HarperDB APIs - -The preferred way to interact with HarperDB for typical querying, accessing, and updating data (CRUD) operations is through the REST interface, described in the [REST documentation](./developers/rest). - -The Operations API provides extensive administrative capabilities for HarperDB, and the [Operations API documentation has usage and examples](./developers/operations-api/). Generally it is recommended that you use the RESTful interface as your primary interface for performant data access, querying, and manipulation (DML) for building production applications (under heavy load), and the operations API (and SQL) for data definition (DDL) and administrative purposes. - -The HarperDB Operations API is single endpoint, which means the only thing that needs to change across different calls is the body. For example purposes, a basic cURL command is shown below to create a database called dev. To change this behavior, swap out the operation in the `data-raw` body parameter. - -``` -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_schema", - "database": "dev" -}' -``` - -## Support and Learning More - -If you find yourself in need of additional support you can submit a [HarperDB support ticket](https://harperdbhelp.zendesk.com/hc/en-us/requests/new). You can also learn more about available HarperDB projects by searching [Github](https://github.com/search?q=harperdb). - -### Video Tutorials - -[HarperDB video tutorials are available on our YouTube channel](https://www.youtube.com/@harperdbio). HarperDB and the HarperDB Studio are constantly changing, as such, there may be small discrepancies in UI/UX. diff --git a/versioned_docs/version-4.3/index.md b/versioned_docs/version-4.3/index.md deleted file mode 100644 index 80f6098b..00000000 --- a/versioned_docs/version-4.3/index.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: HarperDB Docs ---- - -# HarperDB Docs - -HarperDB is a globally-distributed edge application platform. It reduces complexity, increases performance, and lowers costs by combining user-defined applications, a high-performance database, and an enterprise-grade streaming broker into a single package. The platform offers unlimited horizontal scale at the click of a button, and syncs data across the cluster in milliseconds. HarperDB simplifies the process of delivering applications and the data that drives them to the edge, which dramatically improves both the user experience and total cost of ownership for large-scale applications. Deploying HarperDB on global infrastructure enables a CDN-like solution for enterprise data and applications. - -HarperDB's documentation covers installation, getting started, administrative operation APIs, security, and much more. Browse the topics at left, or choose one of the commonly used documentation sections below. - -:::info -Wondering what's new with HarperDB 4.3? Take a look at our latest [Release Notes](/release-notes/v4-tucker/4.3.0). -::: - -## Getting Started - -
-
-

- - Getting Started Guide - -

-

- Get up and running with HarperDB -

-
-
-

- - Quick Install HarperDB - -

-

- Run HarperDB on your on hardware -

-
-
-

- - Try HarperDB Cloud - -

-

- Spin up an instance in minutes to going fast -

-
-
- -## Building with HarperDB - -
-
-

- - HarperDB Applications - -

-

- Build your a fully featured HarperDB Component with custom functionality -

-
-
-

- - REST Queries - -

-

- The recommended HTTP interface for data access, querying, and manipulation -

-
-
-

- - Operations API - -

-

- Configure, deploy, administer, and control your HarperDB instance -

-
-
- -
-
-

- - Clustering & Replication - -

-

- The process of connecting multiple HarperDB databases together to create a database mesh network that enables users to define data replication patterns. -

-
-
-

- - Explore the HarperDB Studio - -

-

- The web-based GUI for HarperDB. Studio enables you to administer, navigate, and monitor all of your HarperDB instances in a simple, user friendly interface. -

-
-
diff --git a/versioned_docs/version-4.3/reference/_category_.json b/versioned_docs/version-4.3/reference/_category_.json deleted file mode 100644 index d6302ac2..00000000 --- a/versioned_docs/version-4.3/reference/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Reference", - "position": 4, - "link": { - "type": "generated-index", - "title": "Reference Documentation", - "description": "Reference documentation and technical specifications", - "keywords": ["reference", "specifications"] - } -} diff --git a/versioned_docs/version-4.3/reference/analytics.md b/versioned_docs/version-4.3/reference/analytics.md deleted file mode 100644 index 314dcd94..00000000 --- a/versioned_docs/version-4.3/reference/analytics.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -HarperDB provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -HarperDB collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the HarperDB analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -The following are general resource usage statistics that are tracked: - -- `memory` - This includes RSS, heap, buffer and external data usage. -- `utilization` - How much of the time the worker was processing requests. -- mqtt-connections - The number of MQTT connections. - -The following types of information is tracked for each HTTP request: - -- `success` - How many requests returned a successful response (20x response code). TTFB - Time to first byte in the response to the client. -- `transfer` - Time to finish the transfer of the data to the client. -- bytes-sent - How many bytes of data were sent to the client. - -Requests are categorized by operation name, for the operations API, by the resource (name) with the REST API, and by command for the MQTT interface. diff --git a/versioned_docs/version-4.3/reference/architecture.md b/versioned_docs/version-4.3/reference/architecture.md deleted file mode 100644 index 5bbb1e47..00000000 --- a/versioned_docs/version-4.3/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -HarperDB's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/versioned_docs/version-4.3/reference/clustering/certificate-management.md b/versioned_docs/version-4.3/reference/clustering/certificate-management.md deleted file mode 100644 index e77a9a1c..00000000 --- a/versioned_docs/version-4.3/reference/clustering/certificate-management.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box HarperDB generates certificates that are used when HarperDB nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the HarperDB node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that HarperDB generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your HarperDB cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make HarperDB start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart HarperDB. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other HarperDB nodes and to make requests to other HarperDB nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart HarperDB. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.3/reference/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.3/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 7865ae1c..00000000 --- a/versioned_docs/version-4.3/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via HarperDB users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, HarperDB must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.3/reference/clustering/enabling-clustering.md b/versioned_docs/version-4.3/reference/clustering/enabling-clustering.md deleted file mode 100644 index 596665d9..00000000 --- a/versioned_docs/version-4.3/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file HarperDB must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to HarperDB configuration HarperDB must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install HarperDB**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.3/reference/clustering/establishing-routes.md b/versioned_docs/version-4.3/reference/clustering/establishing-routes.md deleted file mode 100644 index d8a725d7..00000000 --- a/versioned_docs/version-4.3/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the HarperDB configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.3/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to HarperDB configuration HarperDB must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.3/reference/clustering/index.md b/versioned_docs/version-4.3/reference/clustering/index.md deleted file mode 100644 index 92fe00fe..00000000 --- a/versioned_docs/version-4.3/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -HarperDB clustering is the process of connecting multiple HarperDB databases together to create a database mesh network that enables users to define data replication patterns. - -HarperDB’s clustering engine replicates data between instances of HarperDB using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -HarperDB simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting HarperDB focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to HarperDB, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.3/reference/clustering/managing-subscriptions.md b/versioned_docs/version-4.3/reference/clustering/managing-subscriptions.md deleted file mode 100644 index ebb5b3d2..00000000 --- a/versioned_docs/version-4.3/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: Managing subscriptions ---- - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `set_node_replication`. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "data", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "database": "data", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "database": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.3/reference/clustering/naming-a-node.md b/versioned_docs/version-4.3/reference/clustering/naming-a-node.md deleted file mode 100644 index 308aef7a..00000000 --- a/versioned_docs/version-4.3/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file HarperDB must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.3/reference/clustering/requirements-and-definitions.md b/versioned_docs/version-4.3/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 1e2dd6af..00000000 --- a/versioned_docs/version-4.3/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of HarperDB running. - -\*_A node is a single instance/installation of HarperDB. A node of HarperDB can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a HarperDB cluster. diff --git a/versioned_docs/version-4.3/reference/clustering/subscription-overview.md b/versioned_docs/version-4.3/reference/clustering/subscription-overview.md deleted file mode 100644 index a7b0f8fa..00000000 --- a/versioned_docs/version-4.3/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.3/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.3/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.3/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.3/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.3/reference/clustering/things-worth-knowing.md b/versioned_docs/version-4.3/reference/clustering/things-worth-knowing.md deleted file mode 100644 index 41035b44..00000000 --- a/versioned_docs/version-4.3/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -HarperDB has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -HarperDB clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.3/clustering/figure6.png) diff --git a/versioned_docs/version-4.3/reference/content-types.md b/versioned_docs/version-4.3/reference/content-types.md deleted file mode 100644 index e94887ad..00000000 --- a/versioned_docs/version-4.3/reference/content-types.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -HarperDB supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. HarperDB follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard HarperDB operations. - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by HarperDB, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with HarperDB. CBOR supports the full range of HarperDB data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and HarperDB's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all HarperDB data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with HarperDB's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. - -In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/versioned_docs/version-4.3/reference/data-types.md b/versioned_docs/version-4.3/reference/data-types.md deleted file mode 100644 index 3526a830..00000000 --- a/versioned_docs/version-4.3/reference/data-types.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -HarperDB supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (HarperDB’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. HarperDB supports MessagePack and CBOR, which allows for all of HarperDB supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: - -- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") -- `Int` - Any integer between from -2147483648 to 2147483647 -- `Long` - Any integer between from -9007199254740992 to 9007199254740992 -- `BigInt` - Any integer (negative or positive) with less than 300 digits - -Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in HarperDB. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in HarperDB’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in HarperDB. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in HarperDB property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well. JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in HarperDB as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.3/reference/dynamic-schema.md b/versioned_docs/version-4.3/reference/dynamic-schema.md deleted file mode 100644 index 02b26b03..00000000 --- a/versioned_docs/version-4.3/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. HarperDB tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -HarperDB databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -HarperDB tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in HarperDB operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [HarperDB Storage Algorithm](./storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to HarperDB. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. HarperDB offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -HarperDB automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [HarperDB API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.3/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.3/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.3/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.3/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.3/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.3/reference/globals.md b/versioned_docs/version-4.3/reference/globals.md deleted file mode 100644 index 2d5f7d10..00000000 --- a/versioned_docs/version-4.3/reference/globals.md +++ /dev/null @@ -1,239 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with HarperDB is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that HarperDB provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -## `tables` - -This is an object that holds all the tables for the default database (called `data`) as properties. Each of these property values is a table class that subclasses the Resource interface and provides access to the table through the Resource interface. For example, you can get a record from a table (in the default database) called 'my-table' with: - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -async function getRecord() { - let record = await MyTable.get(recordId); -} -``` - -It is recommended that you [define a database](../getting-started/) for all the tables that are required to exist in your application. This will ensure that the tables exist on the `tables` object. Also note that the property names follow a CamelCase convention for use in JavaScript and in the GraphQL Schemas, but these are translated to snake_case for the actual table names, and converted back to CamelCase when added to the `tables` object. - -## `databases` - -This is an object that holds all the databases in HarperDB, and can be used to explicitly access a table by database name. Each database will be a property on this object, each of these property values will be an object with the set of all tables in that database. The default database, `databases.data` should equal the `tables` export. For example, if you want to access the "dog" table in the "dev" database, you could do so: - -```javascript -import { databases } from 'harperdb'; -const { Dog } = databases.dev; -``` - -## `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](./resource) for more details about implementing a Resource class. - -## `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -## `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -## `server` - -The `server` global object provides a number of functions and objects to interact with Harper's HTTP service. - -### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` - -Alias: `server.request` - -Add a handler method to the HTTP server request listener middleware chain. - -Returns an array of server instances based on the specified `options.port` and `options.securePort`. - -Example: - -```js -server.http( - (request, next) => { - return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); - }, - { - runFirst: true, // run this handler first - } -); -``` - -#### `RequestListener` - -Type: `(request: Request, next: RequestListener) => Promise` - -The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. - -#### `Request` - -An implementation of WHATWG [Request](https://developer.mozilla.org/en-US/docs/Web/API/Request) class. - -#### `Response` - -An implementation of WHATWG [Response](https://developer.mozilla.org/en-US/docs/Web/API/Response) class. - -#### `HttpOptions` - -Type: `Object` - -Properties: - - - - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -#### `HttpServer` - -Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. - -### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` - -Creates a socket server on the specified `options.port` or `options.securePort`. - -Only one socket server will be created. A `securePort` takes precedence. - -#### `ConnectionListener` - -Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) - -#### `SocketOptions` - -- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. -- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -#### `SocketServer` - -Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` - -Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. - -Example: - -```js -server.ws((ws, request, chainCompletion) => { - chainCompletion.then(() => { - ws.on('error', console.error); - - ws.on('message', function message(data) { - console.log('received: %s', data); - }); - - ws.send('something'); - }); -}); -``` - -#### `WsListener` - -Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` - -The WebSocket connection listener. - -- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. -- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. -- The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. -- The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` - -#### `WsOptions` - -Type: `Object` - -Properties: - - - - -- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` - -Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. - -This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. - -Example: - -> This example is from the HarperDB Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) - -```js -server.upgrade( - (request, socket, head, next) => { - if (request.url === '/_next/webpack-hmr') { - return upgradeHandler(request, socket, head).then(() => { - request.__harperdb_request_upgraded = true; - - next(request, socket, head); - }); - } - - return next(request, socket, head); - }, - { runFirst: true } -); -``` - -#### `UpgradeListener` - -Type: `(request, socket, head, next) => void` - -The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. - -#### `UpgradeOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.config` - -This provides access to the HarperDB configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into HarperDB's analytics. HarperDB efficiently records and tracks these metrics and makes them available through [analytics API](./analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. diff --git a/versioned_docs/version-4.3/reference/headers.md b/versioned_docs/version-4.3/reference/headers.md deleted file mode 100644 index 3ddc8528..00000000 --- a/versioned_docs/version-4.3/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: HarperDB Headers ---- - -# HarperDB Headers - -All HarperDB API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all HarperDB API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.3/reference/index.md b/versioned_docs/version-4.3/reference/index.md deleted file mode 100644 index 762a0831..00000000 --- a/versioned_docs/version-4.3/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for HarperDB. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.3/reference/limits.md b/versioned_docs/version-4.3/reference/limits.md deleted file mode 100644 index 8ea207ba..00000000 --- a/versioned_docs/version-4.3/reference/limits.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: HarperDB Limits ---- - -# HarperDB Limits - -This document outlines limitations of HarperDB. - -## Database Naming Restrictions - -**Case Sensitivity** - -HarperDB database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -HarperDB database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -HarperDB limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. diff --git a/versioned_docs/version-4.3/reference/resource.md b/versioned_docs/version-4.3/reference/resource.md deleted file mode 100644 index 791d41a8..00000000 --- a/versioned_docs/version-4.3/reference/resource.md +++ /dev/null @@ -1,697 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to provide a unified API for modeling different data resources within HarperDB. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information to during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: - -- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initial access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. - - You can subsequently use the instance methods on the returned resource instance to perform additional actions on the record. -- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. - -The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. - -The REST-based API is a little different than traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind, but semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: - -- Read - `get` -- Create with a known primary key - `put` -- Create with a generated primary key - `post`/`create` -- Update (Full) - `put` -- Update (Partial) - `patch` -- Delete - `delete` - -The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped to a Resource instance where the instance's ID will be `3492`, and interactions will use the instance methods like `get()`, `put()`, and `post()`. Using the root path (`/MyResource/`) will map to a Resource instance with an ID of `null`, and this represents the collection of all the records in the resource or table. - -You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the HarperDB JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - async get() { - // fetch data from an external source, using our id - let response = await this.fetch(this.id); - // do something with the response - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a HarperDB data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the HarperDB JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -### `databases` - -This is an object with all the databases that have been defined in HarperDB (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../developers/components/writing-extensions). - -### `transaction` - -This provides a function for starting transactions. See the transactions section below for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the [content type extensions documentation](../developers/components/writing-extensions) for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main HarperDB package in your app: - -``` -# you may need to go to your harperdb directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main HarperDB APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. - -### `search(query: Query)`: AsyncIterable - -This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object, query?: Query)` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. - -The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -put(data, query) { - let param1 = query?.get?.('param1'); // returns 'value' - ... -} -``` - -### `patch(data: object, query?: Query)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?)` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.put(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message)` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data: object, query?: Query)` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user)` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. - -### `allowRead(user)` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. - -### `allowUpdate(user)` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. - -### `allowDelete(user)` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. - -### `addTo(property, value)` - -This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a HarperDB operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. -Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(id: Id, data: object, context?: Resource|Context): Promise` - -### `post(data: object, context?: Resource|Context): Promise` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will convert a multi-segment path to multipart id (an array), which facilitates hierarchical id-based data access, and also parses `.property` suffixes for accessing properties and specifying preferred content type in the URL. However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](./transactions) for more information on how transactions work in HarperDB. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). - For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -##### Chained Attributes/Properties - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'HarperDB' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. -An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', { name: 'related', select: ['description', 'id'] } ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attribute`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as HarperDB will execute them. HarperDB will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return { status: 302, headers: { Location: '/new-location' } }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: { message: 'Hello, World!' } }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.3/reference/sql-guide/date-functions.md b/versioned_docs/version-4.3/reference/sql-guide/date-functions.md deleted file mode 100644 index 6829aef1..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: SQL Date Functions ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Date Functions - -HarperDB utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.3/reference/sql-guide/features-matrix.md b/versioned_docs/version-4.3/reference/sql-guide/features-matrix.md deleted file mode 100644 index 6f2dd460..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -title: SQL Features Matrix ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Features Matrix - -HarperDB provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. If not, feel free to [add a Feature Request](https://feedback.harperdb.io/). - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.3/reference/sql-guide/functions.md b/versioned_docs/version-4.3/reference/sql-guide/functions.md deleted file mode 100644 index ad4405c6..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/functions.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: HarperDB SQL Functions ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# HarperDB SQL Functions - -This SQL keywords reference contains the SQL functions available in HarperDB. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.3/reference/sql-guide/index.md b/versioned_docs/version-4.3/reference/sql-guide/index.md deleted file mode 100644 index dc49fe05..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## HarperDB SQL Guide - -The purpose of this guide is to describe the available functionality of HarperDB as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -HarperDB adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -HarperDB has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -HarperDB supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. HarperDB does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -HarperDB supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -HarperDB supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -HarperDB allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.3/reference/sql-guide/json-search.md b/versioned_docs/version-4.3/reference/sql-guide/json-search.md deleted file mode 100644 index 7a0ad5b0..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/json-search.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: SQL JSON Search ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL JSON Search - -HarperDB automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, HarperDB offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.3/reference/sql-guide/reserved-word.md b/versioned_docs/version-4.3/reference/sql-guide/reserved-word.md deleted file mode 100644 index 40019b59..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: HarperDB SQL Reserved Words ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# HarperDB SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: - -``` -SELECT * from data.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from data.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.3/reference/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.3/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index c08aaf05..00000000 --- a/versioned_docs/version-4.3/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,419 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -:::warning -HarperDB encourages developers to utilize other querying tools over SQL for performance purposes. HarperDB SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Geospatial Functions - -HarperDB geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: [https://geojson.org/](https://geojson.org/). There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -2. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -3. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between HarperDB’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain HarperDB Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "HarperDB Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see [https://developers.arcgis.com/documentation/spatial-references/](https://developers.arcgis.com/documentation/spatial-references/). Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find HarperDB Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "HarperDB Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "HarperDB Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.3/reference/storage-algorithm.md b/versioned_docs/version-4.3/reference/storage-algorithm.md deleted file mode 100644 index f91ce006..00000000 --- a/versioned_docs/version-4.3/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The HarperDB storage algorithm is fundamental to the HarperDB core functionality, enabling the [Dynamic Schema](./dynamic-schema) and all other user-facing functionality. HarperDB is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within HarperDB. - -## Query Language Agnostic - -The HarperDB storage algorithm was designed to abstract the data storage from any individual query language. HarperDB currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, HarperDB offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each HarperDB table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. HarperDB tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [HarperDB Dynamic Schema](./dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -HarperDB inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## HarperDB Indexing Example (Single Table) - -![](/img/v4.3/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.3/reference/transactions.md b/versioned_docs/version-4.3/reference/transactions.md deleted file mode 100644 index 984b0a71..00000000 --- a/versioned_docs/version-4.3/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. HarperDB provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in HarperDB. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because HarperDB is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the HarperDB environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_docs/version-4.4/administration/_category_.json b/versioned_docs/version-4.4/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/versioned_docs/version-4.4/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/versioned_docs/version-4.4/administration/administration.md b/versioned_docs/version-4.4/administration/administration.md deleted file mode 100644 index 9702927e..00000000 --- a/versioned_docs/version-4.4/administration/administration.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -Harper is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own Harper servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database Harper is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. Harper provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](./administration/logging/): Harper defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). Harper has a [`get_backup`](./developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. Harper can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -Harper provides rapid horizontal scaling capabilities through [node cloning functionality described here](cloning.md). - -### Monitoring - -Harper provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: - -- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](../reference/analytics). -- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](./developers/operations-api/utilities#system-information). -- Information about the current cluster configuration and status can be found in the [cluster APIs](./developers/operations-api/clustering). -- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor Harper with Graphana. diff --git a/versioned_docs/version-4.4/administration/cloning.md b/versioned_docs/version-4.4/administration/cloning.md deleted file mode 100644 index b3698092..00000000 --- a/versioned_docs/version-4.4/administration/cloning.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that when pointed to another instance of Harper will create a clone of that -instance's config, databases and setup full replication. If it is run in a location where there is no existing Harper install, -it will, along with cloning, install Harper. If it is run in a location where there is another Harper instance, it will -only clone config, databases and replication that do not already exist. - -Clone node is triggered when Harper is installed or started with certain environment or command line (CLI) variables set (see below). - -**Leader node** - the instance of Harper you are cloning.\ -**Clone node** - the new node which will be a clone of the leader node. - -To start clone run `harperdb` in the CLI with either of the following variables set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `REPLICATION_HOSTNAME` - _(optional)_ The clones replication hostname. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 REPLICATION_HOSTNAME=node-2.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--REPLICATION_HOSTNAME` - _(optional)_ The clones clustering host. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --REPLICATION_HOSTNAME node-2.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from -running again. If you want to run clone again set this value to `false`. If Harper is started with the clone variables -still present and `cloned` is true, Harper will just start as normal. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI -variables or cloning overtop of an existing `harperdb-config.yaml` file. - -More can be found in the Harper config documentation [here](../deployments/configuration). - -### Excluding database and components - -To set any specific (optional) clone config, including the exclusion of any database and/or replication, there is a file -called `clone-node-config.yaml` that can be used. - -The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. -If the directory does not exist, create one and add the file to it). - -The config available in `clone-node-config.yaml` is: - -```yaml -databaseConfig: - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -componentConfig: - exclude: - - name: null -``` - -_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, -unless it already exists on the clone._ - -`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. - -`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, -it will only clone the component reference that exists in the leader harperdb-config file. - -### Cloning configuration - -Clone node will not clone any configuration that is classed as unique to the leader node. This includes `replication.hostname`, `replication.url`,`clustering.nodeName`, -`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, -any authentication certificate/key paths. - -### Cloning system database - -Harper uses a database called `system` to store operational information. Clone node will only clone the user and role -tables from this database. It will also set up replication on this table, which means that any existing and future user and roles -that are added will be replicated throughout the cluster. - -Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with -the leader. - -### Replication - -If clone is run with the `REPLICATION_HOSTNAME` variable set, a fully replicating clone will be created. - -If any databases are excluded from the clone, replication will not be set up on these databases. - -### JWT Keys - -If cloning with replication, the leader's JWT private and public keys will be cloned. To disable this, include `CLONE_KEYS=false` in your clone variables. - -### Cloning overtop of an existing Harper instance - -Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication -that does not exist on the node it is running on. - -An example of how this can be useful is if you want to set Harper config before the clone is created. To do this you -would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then -when clone is run it will append the missing config to the file and install Harper with the desired config. - -Another useful example could be retroactively adding another database to an existing instance. Running clone on -an existing instance could create a full clone of another database and set up replication between the database on the -leader and the clone. - -### Cloning steps - -Clone node will execute the following steps when ran: - -1. Look for an existing Harper install. It does this by using the default (or user provided) `ROOTPATH`. -1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start Harper. -1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). -1. Fully clone any databases that don't already exist. -1. If classed as a "fresh clone", install Harper. An instance is classed as a fresh clone if there is no system database. -1. If `REPLICATION_HOSTNAME` is set, set up replication between the leader and clone. -1. Clone is complete, start Harper. - -### Cloning with Docker - -To run clone inside a container add the environment variables to your run command. - -For example: - -``` -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_LEADER_PASSWORD=password \ - -e HDB_LEADER_USERNAME=admin \ - -e HDB_LEADER_URL=https://1.123.45.6:9925 \ - -e REPLICATION_HOSTNAME=1.123.45.7 \ - -p 9925:9925 \ - -p 9926:9926 \ - harperdb/harperdb -``` - -Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/versioned_docs/version-4.4/administration/compact.md b/versioned_docs/version-4.4/administration/compact.md deleted file mode 100644 index 1a71db14..00000000 --- a/versioned_docs/version-4.4/administration/compact.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Compact ---- - -# Compact - -Database files can grow quickly as you use them, sometimes impeding performance. Harper has multiple compact features that can be used to reduce database file size and potentially improve performance. The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. - -There are two options that Harper offers for compacting a Database. - -_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ - -More information on the storage configuration options can be [found here](../deployments/configuration#storage) - -### Copy compaction - -It is recommended that, to prevent any record loss, Harper is not running when performing this operation. - -This will copy a Harper database with compaction. If you wish to use this new database in place of the original, you will need to move/rename it to the path of the original database. - -This command should be run in the [CLI](../deployments/harper-cli) - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - -### Compact on start - -Compact on start is a more automated option that will compact **all** databases when Harper is started. Harper will not start until compact is complete. Under the hood it loops through all non-system databases, creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database to where the original one is located and remove any backups. - -Compact on start is initiated by config in `harperdb-config.yaml` - -_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ - -`compactOnStart` - _Type_: boolean; _Default_: false - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -Using CLI variables - -```bash ---STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true -``` - -```bash -STORAGE_COMPACTONSTART=true -STORAGE_COMPACTONSTARTKEEPBACKUP=true -``` diff --git a/versioned_docs/version-4.4/administration/harper-studio/create-account.md b/versioned_docs/version-4.4/administration/harper-studio/create-account.md deleted file mode 100644 index 2c8a43bc..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [Harper Studio sign up page](https://studio.harperdb.io/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -2. Review the Privacy Policy and Terms of Service. -3. Click the sign up for free button. -4. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -5. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.4/administration/harper-studio/enable-mixed-content.md b/versioned_docs/version-4.4/administration/harper-studio/enable-mixed-content.md deleted file mode 100644 index 67747d71..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -Enabling mixed content is required in cases where you would like to connect the Harper Studio to Harper Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.4/administration/harper-studio/index.md b/versioned_docs/version-4.4/administration/harper-studio/index.md deleted file mode 100644 index 75f4ccfb..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Harper Studio ---- - -# Harper Studio - -Harper Studio is the web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user-friendly interface without any knowledge of the underlying Harper API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - -Harper now includes a simplified local Studio that is packaged with all Harper installations and served directly from the instance. It can be enabled in the [configuration file](../../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). - ---- - -## How does Studio Work? - -While Harper Studio is web based and hosted by us, all database interactions are performed on the Harper instance the studio is connected to. The Harper Studio loads in your browser, at which point you login to your Harper instances. Credentials are stored in your browser cache and are not transmitted back to Harper. All database interactions are made via the Harper Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -Harper Studio enables users to manage both Harper Cloud instances and privately hosted instances all from a single UI. All Harper instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.4/administration/harper-studio/instance-configuration.md b/versioned_docs/version-4.4/administration/harper-studio/instance-configuration.md deleted file mode 100644 index 394aa21c..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/instance-configuration.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -Harper instance configuration can be viewed and managed directly through the Harper Studio. Harper Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Applications URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in Harper database user_ - -- Created Date (Harper Cloud only) - -- Region (Harper Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (Harper Cloud only) - -- Disk IOPS (Harper Cloud only) - -## Update Instance RAM - -Harper Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For Harper Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if Harper Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The Harper Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - - If you do have a credit card associated, you will be presented with the updated billing information. - - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The Harper instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -2. The instance will begin deleting immediately. - -## Restart Instance - -The Harper Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -2. The instance will begin restarting immediately. - -## Instance Config (Read Only) - -A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/versioned_docs/version-4.4/administration/harper-studio/instance-metrics.md b/versioned_docs/version-4.4/administration/harper-studio/instance-metrics.md deleted file mode 100644 index e9b48939..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The Harper Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [Harper logs](../logging/standard-logging), and Harper Cloud alarms (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.4/administration/harper-studio/instances.md b/versioned_docs/version-4.4/administration/harper-studio/instances.md deleted file mode 100644 index 07da8097..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The Harper Studio allows you to administer all of your HarperDinstances in one place. Harper currently offers the following instance types: - -- **Harper Cloud Instance** Managed installations of Harper, what we call [Harper Cloud](../../deployments/harper-cloud/). -- **5G Wavelength Instance** Managed installations of Harper running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ -- **Enterprise Instance** Any Harper installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. Harper stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the Harper instances using the standard [Harper API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. Harper Cloud and Enterprise instances are listed together. - -## Create a New Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select your desired Instance Type. -1. For a Harper Cloud Instance or a Harper 5G Wavelength Instance, click **Create Harper Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial Harper instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial Harper instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ _More on instance specs\_\_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your Harper data will reside. Storage is provisioned based on space and IOPS._ _More on IOPS Impact on Performance\_\_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your Harper Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register Enterprise Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select **Register Enterprise Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Instance Password - - _The password of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Host - - _The host to access the Harper instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the Harper instance. Harper defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The Harper Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **Harper Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **Enterprise Instance** The instance will be removed from the Harper Studio only. This does not uninstall Harper from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -Harper instances can be resized on the [Instance Configuration](./instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a Harper user that is already configured in your Harper instance._ - -1. Enter the database password. - - _The password of a Harper user that is already configured in your Harper instance._ - -1. Click **Log In**. diff --git a/versioned_docs/version-4.4/administration/harper-studio/login-password-reset.md b/versioned_docs/version-4.4/administration/harper-studio/login-password-reset.md deleted file mode 100644 index 93f9a727..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your Harper Studio Account - -To log into your existing Harper Studio account: - -1. Navigate to the [Harper Studio](https://studio.harperdb.io/). -2. Enter your email address. -3. Enter your password. -4. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the Harper Studio password reset page. -2. Enter your email address. -3. Click **send password reset email**. -4. If the account exists, you will receive an email with a temporary password. -5. Navigate back to the Harper Studio login page. -6. Enter your email address. -7. Enter your temporary password. -8. Click **sign in**. -9. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -10. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the Harper Studio profile page. -2. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -3. Click the **Update Password** button. diff --git a/versioned_docs/version-4.4/administration/harper-studio/manage-applications.md b/versioned_docs/version-4.4/administration/harper-studio/manage-applications.md deleted file mode 100644 index 854b94b3..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/manage-applications.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Manage Applications ---- - -# Manage Applications - -[Harper Applications](../../developers/applications/) are enabled by default and can be configured further through the Harper Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of Harper Applications behavior. - -All Applications configuration and development is handled through the **applications** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **applications** in the instance control bar. - -_Note, the **applications** page will only be available to super users._ - -## Manage Applications - -The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your Harper Applications. The editor provides the ability to create new applications or import/deploy remote application packages. - -The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. - -The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the Harper Studio. - -## Things to Keep in Mind - -To learn more about developing Harper Applications, make sure to read through the [Applications](../../developers/applications/) documentation. - -When working with Applications in the Harper Studio, by default the editor will restart the Harper Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the Harper logs, which can be found on the [status page](./instance-metrics). - -The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your Harper instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your Harper instance click the "revert" button. - -## Accessing Your Application Endpoints - -Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [Harper HTTP port found in the Harper configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](./instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. - -Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. - -- **Standard REST Endpoints**\ - Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. -- **Augmented REST Endpoints**\ - Harper Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. -- **Fastify Routes**\ - If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [Harper Application Template](https://github.com/HarperDB/application-template/), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. - -## Creating a New Application - -1. From the application page, click the "+ app" button at the top right. -1. Click "+ Create A New Application Using The Default Template". -1. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. -1. Click OK. -1. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. - -## Editing an Application - -1. From the applications page, click the file you would like to edit from the file navigator on the left. -1. Edit the file with any changes you'd like. -1. Click "save" at the top right. Note, as mentioned above, when you save a file, the Harper Applications server will be restarted immediately. diff --git a/versioned_docs/version-4.4/administration/harper-studio/manage-databases-browse-data.md b/versioned_docs/version-4.4/administration/harper-studio/manage-databases-browse-data.md deleted file mode 100644 index c9b2844d..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/manage-databases-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Databases / Browse Data ---- - -# Manage Databases / Browse Data - -Manage instance databases/tables and browse data in tabular format with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage databases and tables, add new data, and more. - -## Manage Databases and Tables - -#### Create a Database - -1. Click the plus icon at the top right of the databases section. -2. Enter the database name. -3. Click the green check mark. - -#### Delete a Database - -Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. - -1. Click the minus icon at the top right of the databases section. -2. Identify the appropriate database to delete and click the red minus sign in the same row. -3. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired database from the databases section. -2. Click the plus icon at the top right of the tables section. -3. Enter the table name. -4. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -5. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired database from the databases section. -2. Click the minus icon at the top right of the tables section. -3. Identify the appropriate table to delete and click the red minus sign in the same row. -4. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -2. This expands the search filters. -3. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -2. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 2. Click **Import From URL**. - 3. The CSV will load, and you will be redirected back to browse table data. -3. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 2. Navigate to your desired CSV file and select it. - 3. Click **Insert X Records**, where X is the number of records in your CSV. - 4. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -2. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -3. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -4. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -2. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -3. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -2. Click the **delete icon**. -3. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.4/administration/harper-studio/manage-instance-roles.md b/versioned_docs/version-4.4/administration/harper-studio/manage-instance-roles.md deleted file mode 100644 index 5a1e36e2..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/manage-instance-roles.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance role configuration is handled through the **roles** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **roles** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -2. Enter the role name. - -3. Click the green check mark. - -4. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. - -5. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -6. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -2. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -3. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the roles section. - -2. Identify the appropriate role to delete and click the red minus sign in the same row. - -3. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.4/administration/harper-studio/manage-instance-users.md b/versioned_docs/version-4.4/administration/harper-studio/manage-instance-users.md deleted file mode 100644 index e125464a..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance user configuration is handled through the **users** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -Harper instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](./manage-instance-roles)._ - -2. Click **Add User**. - -## Edit a User - -Harper instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -2. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 2. Click **Update Password**. - -3. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 2. Click **Update Role**. - -4. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 2. Click **Delete User**. diff --git a/versioned_docs/version-4.4/administration/harper-studio/manage-replication.md b/versioned_docs/version-4.4/administration/harper-studio/manage-replication.md deleted file mode 100644 index 22c99510..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/manage-replication.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Manage Replication ---- - -# Manage Replication - -Harper instance clustering and replication can be configured directly through the Harper Studio. It is recommended to read through the [clustering documentation](../../reference/clustering/) first to gain a strong understanding of Harper clustering behavior. - -All clustering configuration is handled through the **replication** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **replication** in the instance control bar. - -Note, the **replication** page will only be available to super users. - ---- - -## Initial Configuration - -Harper instances do not have clustering configured by default. The Harper Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../reference/clustering/creating-a-cluster-user) document before proceeding. - -1. Enter Cluster User username. (Defaults to `cluster_user`). -2. Enter Cluster Password. -3. Review and/or Set Cluster Node Name. -4. Click **Enable Clustering**. - -At this point the Studio will restart your Harper Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. - ---- - -## Connect an Instance - -Harper Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -2. Identify the instance you would like to connect from the **unconnected instances** panel. - -3. Click the plus icon next the appropriate instance. - -4. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -Harper Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -2. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, database, and table for replication to be configured. - -2. For publish, click the toggle switch in the **publish** column. - -3. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.4/administration/harper-studio/organizations.md b/versioned_docs/version-4.4/administration/harper-studio/organizations.md deleted file mode 100644 index faae220e..00000000 --- a/versioned_docs/version-4.4/administration/harper-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -Harper Studio organizations provide the ability to group Harper Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique Harper Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for Harper Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the Harper Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the **Create a New Organization** card. -3. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -4. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the Harper Studio Organizations page. -2. Identify the proper organization card and click the trash can icon. -3. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -4. Click the **Do It** button. - -## Manage Users - -Harper Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. In the **add user** box, enter the new user’s email address. -5. Click **Add User**. - -Users may or may not already be Harper Studio users when adding them to an organization. If the Harper Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a Harper Studio account, they will receive an email welcoming them to Harper Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the Harper Studio Organizations page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their Harper Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -6. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_Harper billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -2. Click **Add Coupon**. -3. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.4/administration/jobs.md b/versioned_docs/version-4.4/administration/jobs.md deleted file mode 100644 index 84859ffd..00000000 --- a/versioned_docs/version-4.4/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -Harper Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](../developers/operations-api/bulk-operations#csv-data-load) - -[csv file load](../developers/operations-api/bulk-operations#csv-file-load) - -[csv url load](../developers/operations-api/bulk-operations#csv-url-load) - -[import from s3](../developers/operations-api/bulk-operations#import-from-s3) - -[delete_records_before](../developers/operations-api/utilities#delete-records-before) - -[export_local](../developers/operations-api/utilities#export-local) - -[export_to_s3](../developers/operations-api/utilities#export-to-s3) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.4/administration/logging/audit-logging.md b/versioned_docs/version-4.4/administration/logging/audit-logging.md deleted file mode 100644 index 209b4981..00000000 --- a/versioned_docs/version-4.4/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart Harper for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [Harper API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.4/administration/logging/index.md b/versioned_docs/version-4.4/administration/logging/index.md deleted file mode 100644 index bde1870a..00000000 --- a/versioned_docs/version-4.4/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -Harper provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): Harper maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): Harper uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): Harper stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.4/administration/logging/standard-logging.md b/versioned_docs/version-4.4/administration/logging/standard-logging.md deleted file mode 100644 index 044c2260..00000000 --- a/versioned_docs/version-4.4/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -Harper maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the Harper application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -Harper clustering utilizes two [NATS](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of Harper and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -Harper logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the Harper API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.4/administration/logging/transaction-logging.md b/versioned_docs/version-4.4/administration/logging/transaction-logging.md deleted file mode 100644 index 99222e42..00000000 --- a/versioned_docs/version-4.4/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -Harper offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. Harper leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering/). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.4/deployments/_category_.json b/versioned_docs/version-4.4/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/versioned_docs/version-4.4/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/versioned_docs/version-4.4/deployments/configuration.md b/versioned_docs/version-4.4/deployments/configuration.md deleted file mode 100644 index 6be81165..00000000 --- a/versioned_docs/version-4.4/deployments/configuration.md +++ /dev/null @@ -1,1073 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -Some configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase: `operationsApi`. - -To change a configuration value edit the `harperdb-config.yaml` file and save any changes. Harper must be restarted for changes to take effect. - -Alternately, configuration can be changed via environment and/or command line variables or via the API. To access lower level elements, use underscores to append parent/child elements (when used this way elements are case insensitive): - -``` -- Environment variables: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variables: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Calling `set_configuration` through the API: `operationsApi_network_port: 9925` -``` - -_Note: Component configuration cannot be added or updated via CLI or ENV variables._ - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install Harper overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -Harper is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using Harper to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using Harper behind a proxy server or application server, all the remote ip addresses will be the same and Harper will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`corsAccessControlAllowHeaders` - _Type_: string; _Default_: 'Accept, Content-Type, Authorization' - -A string representation of a comma separated list of header keys for the [Access-Control-Allow-Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) header for OPTIONS requests. - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`maxHeaderSize` - _Type_: integer; _Default_: 16394 - -The maximum allowed size of HTTP headers in bytes. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper component server uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - maxHeaderSize: 8192 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -```yaml -http: - mtls: true -``` - -or - -```yaml -http: - mtls: - required: true - user: user-name -``` - ---- - -### `threads` - -The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: - -`count` - _Type_: number; _Default_: One less than the number of logical cores/processors - -The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because Harper does have other threads at work), assuming Harper is the main service on a server. - -```yaml -threads: - count: 11 -``` - -`debug` - _Type_: boolean | object; _Default_: false - -This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. - -`debug.port` - The port to use for debugging the main thread `debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. `debug.host` - Specify the host interface to listen on `debug.waitForDebugger` - Wait for debugger before starting - -```yaml -threads: - debug: - port: 9249 -``` - -`maxHeapMemory` - _Type_: number; - -```yaml -threads: - maxHeapMemory: 300 -``` - -This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. - ---- - -### `replication` - -The `replication` section configures [Harper replication](../developers/replication/), which is used to create Harper clusters and replicate data between the instances. - -```yaml -replication: - hostname: server-one - url: wss://server-one:9925 - databases: '*' - routes: - - wss://server-two:9925 - port: null - securePort: 9925, - enableRootCAs: true -``` - -`hostname` - _Type_: string; - -The hostname of the current Harper instance. - -`url` - _Type_: string; - -The URL of the current Harper instance. - -`databases` - _Type_: string/array; _Default_: "\*" (all databases) - -Configure which databases to replicate. This can be a string for all database or an array for specific databases. - -```yaml -replication: - databases: - - db1 - - db2 -``` - -`routes` - _Type_: array; - -An array of routes to connect to other nodes. Each element in the array can be either a string or an object with `hostname`, `port` and optionally `startTime` properties. - -`startTime` - _Type_: string; ISO formatted UTC date string. - -Replication will attempt to catch up on all remote data upon setup. To start replication from a specific date, set this property. - -```yaml -replication: - copyTablesToCatchUp: true - hostname: server-one - routes: - - wss://server-two:9925 # URL based route - - hostname: server-three # define a hostname and port - port: 9930 - startTime: 2024-02-06T15:30:00Z -``` - -`port` - _Type_: integer; _Default_: 9925 (the operations API port `operationsApi.port`) - -The port to use for replication connections. - -`securePort` - _Type_: integer; - -The port to use for secure replication connections. - -`enableRootCAs` - _Type_: boolean; _Default_: true - -When true, Harper will verify certificates against the Node.js bundled CA store. The bundled CA store is a snapshot of the Mozilla CA store that is fixed at release time. - -`copyTablesToCatchUp` - _Type_: boolean; _Default_: true - -Replication will first attempt to catch up using the audit log. If unsuccessful, it will perform a full table copy. When set to `false`, replication will only use the audit log. - ---- - -### `clustering` using NATS - -The `clustering` section configures the NATS clustering engine, this is used to replicate data between instances of Harper. - -_Note: There exist two ways to create clusters and replicate data in Harper. One option is to use native Harper replication over Websockets. The other option is to use_ [_NATS_](https://nats.io/about/) _to facilitate the cluster._ - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the Harper mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - -```yaml -clustering: - leafServer: - streams: - maxConsumeMsgs: 100 - maxIngestThreads: 2 -``` - -`maxConsumeMsgs` - _Type_: integer; _Default_: 100 - -The maximum number of messages a consumer can process in one go. - -`maxIngestThreads` - _Type_: integer; _Default_: 2 - -The number of Harper threads that are delegated to ingesting messages. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your Harper cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special Harper user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local Harper Studio, a GUI for Harper hosted on the server. A hosted version of the Harper Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or Harper Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures Harper logging across all Harper functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root`). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: warn - -Control the verbosity of text event logs. - -```yaml -logging: - level: warn -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`console` - _Type_: boolean; _Default_: true - -Controls whether console.log and other console.\* calls (as well as another JS components that writes to `process.stdout` and `process.stderr`) are logged to the log file. By default, these are logged to the log file, but this can be disabled. - -```yaml -logging: - console: true -``` - -`root` - _Type_: string; _Default_: \/log - -The path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: false - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log Harper logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - -`auditAuthEvents` - -`logFailed` - _Type_: boolean; _Default_: false - -Log all failed authentication events. - -_Example:_ `[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -`logSuccessful` - _Type_: boolean; _Default_: false - -Log all successful authentication events. - -_Example:_ `[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -```yaml -logging: - auditAuthEvents: - logFailed: false - logSuccessful: false -``` - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in Harper. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any Harper servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the Harper Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - domainSocket: /user/hdb/operations-server - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server - -The path to the Unix domain socket used to provide the Operations API through the CLI - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the Harper operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The Harper database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the Harper application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: true - -The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`compression.dictionary` _Type_: number; _Default_: null - -Path to a compression dictionary file - -`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` - -Only entries that are larger than this value (in bytes) will be compressed. - -```yaml -storage: - compression: - dictionary: /users/harperdb/dict.txt - threshold: 1000 -``` - -`compactOnStart` - _Type_: boolean; _Default_: false - -When `true` all non-system databases will be compacted when starting Harper, read more [here](../administration/compact). - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -Keep the backups made by compactOnStart. - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -`maxTransactionQueueTime` - _Type_: time; _Default_: 45s - -The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). - -```yaml -storage: - maxTransactionQueueTime: 2m -``` - -`noReadAhead` - _Type_: boolean; _Default_: false - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/schema` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by Harper. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - -`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS - -Defines the page size of the database. - -```yaml -storage: - pageSize: 4096 -``` - ---- - -### `tls` - -The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`ciphers` - _Type_: string; - -Allows specific ciphers to be set. - -If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: - -```yaml -tls: - - certificate: ~/hdb/keys/certificate1.pem - certificateAuthority: ~/hdb/keys/ca1.pem - privateKey: ~/hdb/keys/privateKey1.pem - host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. - - certificate: ~/hdb/keys/certificate2.pem - certificateAuthority: ~/hdb/keys/ca2.pem - privateKey: ~/hdb/keys/privateKey2.pem -``` - -Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. - ---- - -### `mqtt` - -The MQTT protocol can be configured in this section. - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 - mtls: false - webSocket: true - requireAuthentication: true -``` - -`port` - _Type_: number; _Default_: 1883 - -This is the port to use for listening for insecure MQTT connections. - -`securePort` - _Type_: number; _Default_: 8883 - -This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. - -`webSocket` - _Type_: boolean; _Default_: true - -This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. - -`requireAuthentication` - _Type_: boolean; _Default_: true - -This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` - -This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. - -For example, you could specify that mTLS is required and will authenticate as "user-name": - -```yaml -mqtt: - network: - mtls: - user: user-name - required: true -``` - ---- - -### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. This configuration should be set before the database and table have been created. The configuration will not create the directories in the path, that must be done by the user. - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` - ---- - -### Components - -`` - _Type_: string - -The name of the component. This will be used to name the folder where the component is installed and must be unique. - -`package` - _Type_: string - -A reference to your [component](../developers/components/managing#adding-components-to-root) package. This could be a remote git repo, a local folder/file or an NPM package. Harper will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. - -Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) - -`port` - _Type_: number _Default_: whatever is set in `http.port` - -The port that your component should listen on. If no port is provided it will default to `http.port` - -```yaml -: - package: 'HarperDB-Add-Ons/package-name' - port: 4321 -``` diff --git a/versioned_docs/version-4.4/deployments/harper-cli.md b/versioned_docs/version-4.4/deployments/harper-cli.md deleted file mode 100644 index e559df01..00000000 --- a/versioned_docs/version-4.4/deployments/harper-cli.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: Harper CLI ---- - -# Harper CLI - -## Harper CLI - -The Harper command line interface (CLI) is used to administer [self-installed Harper instances](./install-harper/). - -### Installing Harper - -To install Harper with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, Harper installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](./configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -**Environment Variables** - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -**Command Line Arguments** - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -### Starting Harper - -To start Harper after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -### Stopping Harper - -To stop Harper once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -### Restarting Harper - -To restart Harper once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -### Getting the Harper Version - -To check the version of Harper that is installed run the following command: - -```bash -harperdb version -``` - ---- - -### Renew self-signed certificates - -To renew the Harper generated self-signed certificates, run: - -```bash -harperdb renew-certs -``` - ---- - -### Copy a database with compaction - -To copy a Harper database with compaction (to eliminate free-space and fragmentation), use - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - ---- - -### Get all available CLI commands - -To display all available Harper CLI commands along with a brief description run: - -```bash -harperdb help -``` - ---- - -### Get the status of Harper and clustering - -To display the status of the Harper process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - ---- - -### Backups - -Harper uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that Harper maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a Harper database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. - ---- - -## Operations API through the CLI - -Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. To call the operation use the following convention: ` =`. By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. - -Some examples are: - -```bash -$ harperdb describe_table database=dev table=dog - -schema: dev -name: dog -hash_attribute: id -audit: true -schema_defined: false -attributes: - - attribute: id - is_primary_key: true - - attribute: name - indexed: true -clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b -record_count: 10 -last_updated_record: 1724483231970.9949 -``` - -`harperdb set_configuration logging_level=error` - -`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` - -`harperdb get_components` - -`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` - -`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` - -`harperdb sql sql='select * from dev.dog where id="1"'` - -### Remote Operations - -The CLI can also be used to run operations on remote Harper instances. To do this, pass the `target` parameter with the HTTP address of the remote instance. You generally will also need to provide credentials and specify the `username` and `password` parameters, or you can set environment variables `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD`, for example: - -```bash -export CLI_TARGET_USERNAME=HDB_ADMIN -export CLI_TARGET_PASSWORD=password -harperdb describe_database database=dev target=https://server.com:9925 -``` - -The same set of operations API are available for remote operations as well. - -#### Remote Component Deployment - -When using remote operations, you can deploy a local component to the remote instance. If you omit the `package` parameter, you can deploy the current directory. This will package the current directory and send it to the target server (also `deploy` is allowed as an alias to `deploy_component`): - -```bash -harperdb deploy target=https://server.com:9925 -``` - -If you are interacting with a cluster, you may wish to include the `replicated=true` parameter to ensure that the deployment operation is replicated to all nodes in the cluster. You will also need to restart afterwards to apply the changes (here seen with the replicated parameter): - -```bash -harperdb restart target=https://server.com:9925 replicated=true -``` diff --git a/versioned_docs/version-4.4/deployments/harper-cloud/alarms.md b/versioned_docs/version-4.4/deployments/harper-cloud/alarms.md deleted file mode 100644 index 372807e5..00000000 --- a/versioned_docs/version-4.4/deployments/harper-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -Harper Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harper-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | --------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harper-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harper-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harper-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.4/deployments/harper-cloud/index.md b/versioned_docs/version-4.4/deployments/harper-cloud/index.md deleted file mode 100644 index c0785d0d..00000000 --- a/versioned_docs/version-4.4/deployments/harper-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Harper Cloud ---- - -# Harper Cloud - -[Harper Cloud](https://studio.harperdb.io/) is the easiest way to test drive Harper, it’s Harper-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. Harper Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new Harper Cloud instance in the Harper Studio. diff --git a/versioned_docs/version-4.4/deployments/harper-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.4/deployments/harper-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 6ea4c7d2..00000000 --- a/versioned_docs/version-4.4/deployments/harper-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While Harper Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.4/deployments/harper-cloud/iops-impact.md b/versioned_docs/version-4.4/deployments/harper-cloud/iops-impact.md deleted file mode 100644 index 18e9f948..00000000 --- a/versioned_docs/version-4.4/deployments/harper-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -Harper, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running Harper. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that Harper performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers Harper Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## Harper Cloud Storage - -Harper Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all Harper Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html). - -## Estimating IOPS for Harper Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact Harper Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. Harper utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any Harper operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to Harper’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, Harper should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.4/deployments/harper-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.4/deployments/harper-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index b6104f7c..00000000 --- a/versioned_docs/version-4.4/deployments/harper-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your Harper instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -Harper on Verizon 5G Wavelength brings Harper closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from Harper to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -Harper 5G Wavelength Instance Specs While Harper 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## Harper 5G Wavelength Storage - -Harper 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of Harper, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger Harper volume. Learn more about the [impact of IOPS on performance here](./iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.4/deployments/install-harper/index.md b/versioned_docs/version-4.4/deployments/install-harper/index.md deleted file mode 100644 index 00eff136..00000000 --- a/versioned_docs/version-4.4/deployments/install-harper/index.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Install Harper ---- - -# Install Harper - -## Install Harper - -This documentation contains information for installing Harper locally. Note that if you’d like to get up and running quickly, you can try a [managed instance with Harper Cloud](https://studio.harperdb.io/sign-up). Harper is a cross-platform database; we recommend Linux for production use, but Harper can run on Windows and Mac as well, for development purposes. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. - -Harper runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing Harper, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start Harper - -Then you can install Harper with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -Harper will automatically start after installation. Harper's installation can be configured with numerous options via CLI arguments, for more information visit the [Harper Command Line Interface](./harper-cli) guide. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harper/linux). - -## With Docker - -If you would like to run Harper in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a Harper container. - -## Offline Install - -If you need to install Harper on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -## Installation on Less Common Platforms - -Harper comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.4/deployments/install-harper/linux.md b/versioned_docs/version-4.4/deployments/install-harper/linux.md deleted file mode 100644 index cc312bac..00000000 --- a/versioned_docs/version-4.4/deployments/install-harper/linux.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install Harper. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to Harper with sudo privileges exists -1. An additional volume for storing Harper files is attached to the Linux instance -1. Traffic to ports 9925 (Harper Operations API) 9926 (Harper Application Interface) and 9932 (Harper Clustering) is permitted - -While you will need to access Harper through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start Harper - -Here is an example of installing Harper with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing Harper with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../configuration): - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" -``` - -#### Start Harper on Boot - -Harper will automatically start after installation. If you wish Harper to start when the OS boots, you have two options: - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=Harper - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [Harper Command Line Interface guide](../harper-cli) and the [Harper Configuration File guide](../configuration). diff --git a/versioned_docs/version-4.4/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.4/deployments/upgrade-hdb-instance.md deleted file mode 100644 index 768b9323..00000000 --- a/versioned_docs/version-4.4/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Upgrade a Harper Instance ---- - -# Upgrade a Harper Instance - -This document describes best practices for upgrading self-hosted Harper instances. Harper can be upgraded using a combination of npm and built-in Harper upgrade scripts. Whenever upgrading your Harper installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted Harper instances only. All [Harper Cloud instances](./harper-cloud/) will be upgraded by the Harper Cloud team. - -## Upgrading - -Upgrading Harper is a two-step process. First the latest version of Harper must be downloaded from npm, then the Harper upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of Harper using `npm install -g harperdb`. - - Note `-g` should only be used if you installed Harper globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - Harper will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -Harper supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading Harper, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that Harper is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure Harper is not running: - -```bash -harperdb stop -``` - -Uninstall Harper. Note, this step is not required, but will clean up old artifacts of Harper. We recommend removing all other Harper installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install Harper globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start Harper - -```bash -harperdb start -``` - ---- - -## Upgrading Nats to Plexus 4.4 - -To upgrade from NATS clustering to Plexus replication, follow these manual steps. They are designed for a fully replicating cluster to ensure minimal disruption during the upgrade process. - -The core of this upgrade is the _bridge node_. This node will run both NATS and Plexus simultaneously, ensuring that transactions are relayed between the two systems during the transition. The bridge node is crucial in preventing any replication downtime, as it will handle transactions from NATS nodes to Plexus nodes and vice versa. - -### Enabling Plexus - -To enable Plexus on a node that is already running NATS, you will need to update [two values](./configuration) in the `harperdb-config.yaml` file: - -```yaml -replication: - url: wss://my-cluster-node-1:9925 - hostname: node-1 -``` - -`replication.url` – This should be set to the URL of the current Harper instance. - -`replication.hostname` – Since we are upgrading from NATS, this value should match the `clustering.nodeName` of the current instance. - -### Upgrade Steps - -1. Set up the bridge node: - - Choose one node to be the bridge node. - - On this node, follow the "Enabling Plexus" steps from the previous section, but **do not disable NATS clustering on this instance.** - - Stop the instance and perform the upgrade. - - Start the instance. This node should now be running both Plexus and NATS. -1. Upgrade a node: - - Choose a node that needs upgrading and enable Plexus by following the "Enable Plexus" steps. - - Disable NATS by setting `clustering.enabled` to `false`. - - Stop the instance and upgrade it. - - Start the instance. - - Call [`add_node`](../developers/operations-api/clustering#add-node) on the upgraded instance. In this call, omit `subscriptions` so that a fully replicating cluster is built. The target node for this call should be the bridge node. _Note: depending on your setup, you may need to expand this `add_node` call to include_ [_authorization and/or tls information_](../developers/operations-api/clustering#add-node)_._ - -```json -{ - "operation": "add_node", - "hostname:": "node-1", - "url": "wss://my-cluster-node-1:9925" -} -``` - -1. Repeat Step 2 on all remaining nodes that need to be upgraded. -1. Disable NATS on the bridge node by setting `clustering.enabled` to `false` and restart the instance. - -Your cluster upgrade should now be complete, with no NATS processes running on any of the nodes. diff --git a/versioned_docs/version-4.4/developers/_category_.json b/versioned_docs/version-4.4/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/versioned_docs/version-4.4/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/versioned_docs/version-4.4/developers/applications/caching.md b/versioned_docs/version-4.4/developers/applications/caching.md deleted file mode 100644 index 4493111f..00000000 --- a/versioned_docs/version-4.4/developers/applications/caching.md +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: Caching ---- - -# Caching - -Harper has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, Harper makes an ideal data caching server. Harper can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. Harper also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, Harper is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](./defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: - -- `expiration` - The amount of time until a record goes stale. -- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). -- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the Harper environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyTable } = tables; -MyTable.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), Harper will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -Harper handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - setInterval(() => { // every second retrieve more data - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - }, 1000); - } - async get() { -... -``` - -Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record. -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, Harper will only run the subscribe method on one thread. Harper is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in Harper. Caching tables also have [subscription capabilities](./caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of Harper, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - const post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of post.comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). - -## Caching Flow - -It may be helpful to understand the flow of a cache request. When a request is made to a caching table: - -- Harper will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. - - If the record is not in the cache, Harper will first check if there is a current request to get the record from the source. If there is, Harper will wait for the request to complete and return the record from the cache. - - If not, Harper will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. - - If the record is in the cache, Harper will check if the record is stale. If the record is not stale, Harper will immediately return the record from the cache. If the record is stale, Harper will call the `get()` method on the source to retrieve the record. - - The record will then be stored in the cache. This will write the record to the cache in a separate asynchronous/background write-behind transaction, so it does not block the current request, then return the data immediately once it has it. -- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. - -### Caching Flow with Write-Through - -When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: - -- Harper will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. -- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. -- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. - - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). - - The transaction writes will then be written the local caching table. -- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/versioned_docs/version-4.4/developers/applications/debugging.md b/versioned_docs/version-4.4/developers/applications/debugging.md deleted file mode 100644 index bd9d2622..00000000 --- a/versioned_docs/version-4.4/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -Harper components and applications run inside the Harper process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the Harper entry script with your IDE, or you can start Harper in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run Harper in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use Harper's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by Harper. This logger can be used to output messages directly to the Harper log using standardized logging level functions, described below. The log level can be set in the [Harper Configuration File](../../deployments/configuration). - -Harper Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The Harper Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the Studio Status page. Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the Harper log. diff --git a/versioned_docs/version-4.4/developers/applications/define-routes.md b/versioned_docs/version-4.4/developers/applications/define-routes.md deleted file mode 100644 index c442f9f1..00000000 --- a/versioned_docs/version-4.4/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -Harper’s applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](./define-routes#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](./req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.4/developers/applications/defining-roles.md b/versioned_docs/version-4.4/developers/applications/defining-roles.md deleted file mode 100644 index 365aa132..00000000 --- a/versioned_docs/version-4.4/developers/applications/defining-roles.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Defining Application Roles ---- - -# Defining Application Roles - -Applications are more than just tables and endpoints — they need access rules. Harper lets you define roles directly in your application so you can control who can do what, without leaving your codebase. - -Let’s walk through creating a role, assigning it, and seeing it in action. - -## Step 1: Declare a Role - -First, point Harper to a roles configuration file. Add this to your `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -Then create a simple `roles.yaml` in your application directory. For example, here’s a role that can only read and insert data into the `Dog` table: - -```yaml -dog-reader: - super_user: false - data: - Dog: - read: true - insert: true -``` - -When Harper starts up, it will create this role (or update it if it already exists). - -## Step 2: Create a User for the Role - -Next, create a non-super_user user and assign them this role. You can do this with the [Users and Roles API](../security/users-and-roles) (requires a super_user to run): - -```bash -curl -u admin:password -X POST http://localhost:9926 \ - -H "Content-Type: application/json" \ - -d '{ - "operation": "add_user", - "username": "alice", - "password": "password", - "role": "dog_reader" - }' -``` - -Now you have a user named `alice` with the `dog_reader` role. - -## Step 3: Make Requests as Different Users - -Authenticate requests as `alice` to see how her role works: - -```bash -# allowed (insert, role permits insert) -curl -u alice:password -X POST http://localhost:9926/Dog/ \ - -H "Content-Type: application/json" \ - -d '{"name": "Buddy", "breed": "Husky"}' - -# not allowed (delete, role does not permit delete) -curl -u alice:password -X DELETE http://localhost:9926/Dog/1 -``` - -The first request succeeds with a `200 OK`. The second fails with a `403 Forbidden`. - -Now compare with a super_user: - -```bash -# super_user can delete -curl -u admin:password -X DELETE http://localhost:9926/Dog/1 -``` - -This succeeds because the super_user role has full permissions. - -## Where to Go Next - -This page gave you the basics - declare a role, assign it, and see it work. - -For more advanced scenarios, including: - -- defining multiple databases per role, -- granting fine-grained attribute-level permissions, -- and the complete structure of `roles.yaml`, - -see the [Roles Reference](../../reference/roles). diff --git a/versioned_docs/version-4.4/developers/applications/defining-schemas.md b/versioned_docs/version-4.4/developers/applications/defining-schemas.md deleted file mode 100644 index 5268fb02..00000000 --- a/versioned_docs/version-4.4/developers/applications/defining-schemas.md +++ /dev/null @@ -1,219 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in Harper's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that Harper uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -### Relationships: `@relationship` - -Defining relationships is the foundation of using "join" queries in Harper. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. - -#### `@relationship(from: attribute)` - -This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: - -```graphql -type Product @table @export { - id: ID @primaryKey - brandId: ID @indexed - brand: Brand @relationship(from: brandId) -} -type Brand @table @export { - id: ID @primaryKey -} -``` - -Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resource) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). - -Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: - -```graphql -type Product @table @export { - id: ID @primaryKey - featureIds: [ID] @indexed # array of ids - features: [Feature] @relationship(from: featureIds) # array of referenced feature records -} -type Feature @table { - id: ID @primaryKey - ... -} -``` - -#### `@relationship(to: attribute)` - -This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: - -```graphql -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: brandId) -} -``` - -Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. - -Note that schemas can also reference themselves with relationships, allowing records to define relationships like parent-child relationships between records in the same table. Also note, that for a many-to-many relationship, you must not combine the `to` and `from` property in the same relationship directive. - -### Computed Properties: `@computed` - -The `@computed` directive specifies that a field is computed based on other fields in the record. This is useful for creating derived fields that are not stored in the database, but are computed when specific record fields is queried/accessed. The `@computed` directive must be used in combination with a field that is a function that computes the value of the field. For example: - -```graphql -type Product @table { - id: ID @primaryKey - price: Float - taxRate: Float - totalPrice: Float @computed(from: "price + (price * taxRate)") -} -``` - -The `from` argument specifies the expression that computes the value of the field. The expression can reference other fields in the record. The expression is evaluated when the record is queried or indexed. - -The `computed` directive may also be defined in a JavaScript module, which is useful for more complex computations. You can specify a computed attribute, and then define the function with the `setComputedAttribute` method. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed -} -``` - -```javascript -tables.Product.setComputedAttribute('totalPrice', (record) => { - return record.price + record.price * record.taxRate; -}); -``` - -Computed properties may also be indexed, which provides a powerful mechanism for creating indexes on derived fields with custom querying capabilities. This can provide a mechanism for composite indexes, custom full-text indexing, vector indexing, or other custom indexing strategies. A computed property can be indexed by adding the `@indexed` directive to the computed property. When using a JavaScript module for a computed property that is indexed, it is highly recommended that you specify a `version` argument to ensure that the computed attribute is re-evaluated when the function is updated. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed(version: 1) @indexed -} -``` - -If you were to update the `setComputedAttribute` function for the `totalPrice` attribute, to use a new formula, you must increment the `version` argument to ensure that the computed attribute is re-indexed (note that on a large database, re-indexing may be a lengthy operation). Failing to increment the `version` argument with a modified function can result in an inconsistent index. The computed function must be deterministic, and should not have side effects, as it may be re-evaluated multiple times during indexing. - -Note that computed properties will not be included by default in a query result, you must explicitly include them in query results using the `select` query function. - -Another example of using a computed custom index, is that we could index all the comma-separated words in a `tags` property by doing (similar techniques are used for full-text indexing): - -```graphql -type Product @table { - id: ID @primaryKey - tags: String # comma delimited set of tags - tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed # split and index the tags -} -``` - -For more in-depth information on computed properties, visit our blog [here](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated if no primary key is provided. When a primary key is auto-generated, it will be a UUID (as a string) if the primary key type is `String` or `ID`. If the primary key type is `Int`, `Long`, or `Any`, then the primary key will be an auto-incremented number. Using numeric primary keys is more efficient than using UUIDs. Note that if the type is `Int`, the primary key will be limited to 32-bit, which can be limiting and problematic for large tables. It is recommended that if you will be relying on auto-generated keys, that you use a primary key type of `Long` or `Any` (the latter will allow you to also use strings as primary keys). - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself. - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -Harper supports the following field types in addition to user defined (object) types: - -- `String`: String/text. -- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647). -- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992). -- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available). -- `BigInt`: Any integer (negative or positive) with less than 300 digits. (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately.) -- `Boolean`: true or false. -- `ID`: A string (but indicates it is not intended to be human readable). -- `Any`: Any primitive, object, or array is allowed. -- `Date`: A Date object. -- `Bytes`: Binary data (as a Buffer or Uint8Array). - -#### Renaming Tables - -It is important to note that Harper does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. - -### OpenAPI Specification - -_The_ [_OpenAPI Specification_](https://spec.openapis.org/oas/v3.1.0) _defines a standard, programming language-agnostic interface description for HTTP APIs, which allows both humans and computers to discover and understand the capabilities of a service without requiring access to source code, additional documentation, or inspection of network traffic._ - -If a set of endpoints are configured through a Harper GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. - -_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/versioned_docs/version-4.4/developers/applications/index.md b/versioned_docs/version-4.4/developers/applications/index.md deleted file mode 100644 index 29094048..00000000 --- a/versioned_docs/version-4.4/developers/applications/index.md +++ /dev/null @@ -1,378 +0,0 @@ ---- -title: Applications ---- - -# Applications - -## Overview of Harper Applications - -Harper is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of Harper instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that Harper provides by building a Harper component, a fundamental building-block of the Harper ecosystem. - -When working through this guide, we recommend you use the [Harper Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -## Understanding the Component Application Architecture - -Harper provides several types of components. Any package that is added to Harper is called a "component", and components are generally categorized as either "applications", which deliver a set of endpoints for users, or "extensions", which are building blocks for features like authentication, additional protocols, and connectors that can be used by other components. Components can be added to the `hdb/components` directory and will be loaded by Harper when it starts. Components that are remotely deployed to Harper (through the studio or the operation API) are installed into the `hdb/node_modules` directory. Using `harperdb run .` or `harperdb dev .` allows us to specifically load a certain application in addition to any that have been manually added to `hdb/components` or installed (in `hdb/node_modules`). - -```mermaid -flowchart LR - Client(Client)-->Endpoints - Client(Client)-->HTTP - Client(Client)-->Extensions - subgraph Harper - direction TB - Applications(Applications)-- "Schemas" --> Tables[(Tables)] - Applications-->Endpoints[/Custom Endpoints/] - Applications-->Extensions - Endpoints-->Tables - HTTP[/REST/HTTP/]-->Tables - Extensions[/Extensions/]-->Tables - end -``` - -## Getting up and Running - -### Pre-Requisites - -We assume you are running Harper version 4.2 or greater, which supports Harper Application architecture (in previous versions, this is 'custom functions'). - -### Scaffolding our Application Directory - -Let's create and initialize a new directory for our application. It is recommended that you start by using the [Harper application template](https://github.com/HarperDB/application-template). Assuming you have `git` installed, you can create your project directory by cloning: - -```shell -> git clone https://github.com/HarperDB/application-template my-app -> cd my-app -``` - -
- -You can also start with an empty application directory if you'd prefer. - -To create your own application from scratch, you'll may want to initialize it as an npm package with the \`type\` field set to \`module\` in the \`package.json\` so that you can use the EcmaScript module syntax used in this tutorial: - -```shell -> mkdir my-app -> cd my-app -> npm init -y esnext -``` - -
- -
- -If you want to version control your application code, you can adjust the remote URL to your repository. - -Here's an example for a github repo: - -```shell -> git remote set-url origin git@github.com:// -``` - -Locally developing your application and then committing your app to a source control is a great way to manage your code and configuration, and then you can [directly deploy from your repository](#deploying-your-application). - -
- -## Creating our first Table - -The core of a Harper application is the database, so let's create a database table! - -A quick and expressive way to define a table is through a [GraphQL Schema](https://graphql.org/learn/schema). Using your editor of choice, edit the file named `schema.graphql` in the root of the application directory, `my-app`, that we created above. To create a table, we will need to add a `type` of `@table` named `Dog` (and you can remove the example table in the template): - -```graphql -type Dog @table { - # properties will go here soon -} -``` - -And then we'll add a primary key named `id` of type `ID`: - -_(Note: A GraphQL schema is a fast method to define tables in Harper, but you are by no means required to use GraphQL to query your application, nor should you necessarily do so)_ - -```graphql -type Dog @table { - id: ID @primaryKey -} -``` - -Now we tell Harper to run this as an application: - -```shell -> harperdb dev . # tell Harper cli to run current directory as an application in dev mode -``` - -Harper will now create the `Dog` table and its `id` attribute we just defined. Not only is this an easy way to get create a table, but this schema is included in our application, which will ensure that this table exists wherever we deploy this application (to any Harper instance). - -## Adding Attributes to our Table - -Next, let's expand our `Dog` table by adding additional typed attributes for dog `name`, `breed` and `age`. - -```graphql -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} -``` - -This will ensure that new records must have these properties with these types. - -Because we ran `harperdb dev .` earlier (dev mode), Harper is now monitoring the contents of our application directory for changes and reloading when they occur. This means that once we save our schema file with these new attributes, Harper will automatically reload our application, read `my-app/schema.graphql` and update the `Dog` table and attributes we just defined. The dev mode will also ensure that any logging or errors are immediately displayed in the console (rather only in the log file). - -As a NoSQL database, Harper supports heterogeneous records (also referred to as documents), so you can freely specify additional properties on any record. If you do want to restrict the records to only defined properties, you can always do that by adding the `sealed` directive: - -```graphql -type Dog @table @sealed { - id: ID @primaryKey - name: String - breed: String - age: Int - tricks: [String] -} -``` - -If you are using Harper Studio, we can now add JSON-formatted records to this new table in the studio or upload data as CSV from a local file or URL. A third, more advanced, way to add data to your database is to use the [operations API](../../developers/operations-api/), which provides full administrative control over your new Harper instance and tables. - -## Adding an Endpoint - -Now that we have a running application with a database (with data if you imported any data), let's make this data accessible from a RESTful URL by adding an endpoint. To do this, we simply add the `@export` directive to our `Dog` table: - -```graphql -type Dog @table @export { - id: ID @primaryKey - name: String - breed: String - age: Int - tricks: [String] -} -``` - -By default the application HTTP server port is `9926` (this can be [configured here](../../deployments/configuration#http)), so the local URL would be [http://localhost:9926/Dog/](http://localhost:9926/Dog/) with a full REST API. We can PUT or POST data into this table using this new path, and then GET or DELETE from it as well (you can even view data directly from the browser). If you have not added any records yet, we could use a PUT or POST to add a record. PUT is appropriate if you know the id, and POST can be used to assign an id: - -```http -POST /Dog/ -Content-Type: application/json - -{ - "name": "Harper", - "breed": "Labrador", - "age": 3, - "tricks": ["sits"] -} -``` - -With this a record will be created and the auto-assigned id will be available through the `Location` header. If you added a record, you can visit the path `/Dog/` to view that record. Alternately, the curl command `curl http://localhost:9926/Dog/` will achieve the same thing. - -## Authenticating Endpoints - -These endpoints automatically support `Basic`, `Cookie`, and `JWT` authentication methods. See the documentation on [security](../../developers/security/) for more information on different levels of access. - -By default, Harper also automatically authorizes all requests from loopback IP addresses (from the same computer) as the superuser, to make it simple to interact for local development. If you want to test authentication/authorization, or enforce stricter security, you may want to disable the [`authentication.authorizeLocal` setting](../../deployments/configuration#authentication). - -### Content Negotiation - -These endpoints support various content types, including `JSON`, `CBOR`, `MessagePack` and `CSV`. Simply include an `Accept` header in your requests with the preferred content type. We recommend `CBOR` as a compact, efficient encoding with rich data types, but `JSON` is familiar and great for web application development, and `CSV` can be useful for exporting data to spreadsheets or other processing. - -Harper works with other important standard HTTP headers as well, and these endpoints are even capable of caching interaction: - -``` -Authorization: Basic -Accept: application/cbor -If-None-Match: "etag-id" # browsers can automatically provide this -``` - -## Querying - -Querying your application database is straightforward and easy, as tables exported with the `@export` directive are automatically exposed via [REST endpoints](./rest). Simple queries can be crafted through [URL query parameters](https://en.wikipedia.org/wiki/Query_string). - -In order to maintain reasonable query speed on a database as it grows in size, it is critical to select and establish the proper indexes. So, before we add the `@export` declaration to our `Dog` table and begin querying it, let's take a moment to target some table properties for indexing. We'll use `name` and `breed` as indexed table properties on our `Dog` table. All we need to do to accomplish this is tag these properties with the `@indexed` directive: - -```graphql -type Dog @table { - id: ID @primaryKey - name: String @indexed - breed: String @indexed - owner: String - age: Int - tricks: [String] -} -``` - -And finally, we'll add the `@export` directive to expose the table as a RESTful endpoint - -```graphql -type Dog @table @export { - id: ID @primaryKey - name: String @indexed - breed: String @indexed - owner: String - age: Int - tricks: [String] -} -``` - -Now we can start querying. Again, we just simply access the endpoint with query parameters (basic GET requests), like: - -``` -http://localhost:9926/Dog/?name=Harper -http://localhost:9926/Dog/?breed=Labrador -http://localhost:9926/Dog/?breed=Husky&name=Balto&select=id,name,breed -``` - -Congratulations, you now have created a secure database application backend with a table, a well-defined structure, access controls, and a functional REST endpoint with query capabilities! See the [REST documentation for more information on HTTP access](../../developers/rest) and see the [Schema reference](applications/defining-schemas) for more options for defining schemas. - -> Additionally, you may now use GraphQL (over HTTP) to create queries. See the documentation for that new feature [here](../../reference/graphql). - -## Deploying your Application - -This guide assumes that you're building a Harper application locally. If you have a cloud instance available, you can deploy it by doing the following: - -- Commit and push your application component directory code (i.e., the `my-app` directory) to a Github repo. In this tutorial we started with a clone of the application-template. To commit and push to your own repository, change the origin to your repo: `git remote set-url origin git@github.com:your-account/your-repo.git` -- Go to the applications section of your target cloud instance in the Harper Studio. -- In the left-hand menu of the applications IDE, click 'deploy' and specify a package location reference that follows the [npm package specification](https://docs.npmjs.com/cli/v8/using-npm/package-spec) (i.e., a string like `HarperDB/Application-Template` or a URL like `https://github.com/HarperDB/application-template`, for example, that npm knows how to install). - -You can also deploy your application from your repository by directly using the [`deploy_component` operation](../../developers/operations-api/components#deploy-component). - -Once you have deployed your application to a Harper cloud instance, you can start scaling your application by adding additional instances in other regions. - -With the help of a global traffic manager/load balancer configured, you can distribute incoming requests to the appropriate server. You can deploy and re-deploy your application to all the nodes in your mesh. - -Now, with an application that you can deploy, update, and re-deploy, you have an application that is horizontally and globally scalable! - -## Custom Functionality with JavaScript - -So far we have built an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In Harper, data is accessed through our [Resource API](../../reference/resources/), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the Harper provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - get(query) { - this.humanAge = 15 + this.age * 5; // silly calculation of human age equivalent - return super.get(query); - } -} -``` - -Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. The instance holds information about the primary key of the record so updates and actions can be applied to the correct record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](../../developers/rest#select-properties)). - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -And next we will use this table in our `get()` method. We will call the new table's (static) `get()` method to retrieve a breed by id. To do this correctly, we access the table using our current context by passing in `this` as the second argument. This is important because it ensures that we are accessing the data atomically, in a consistent snapshot across tables. This provides automatically tracking of most recently updated timestamps across resources for caching purposes. This allows for sharing of contextual metadata (like user who requested the data), and ensure transactional atomicity for any writes (not needed in this get operation, but important for other operations). The resource methods are automatically wrapped with a transaction (will commit/finish when the method completes), and this allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - async get(query) { - let breedDescription = await Breed.get(this.breed, this); - this.breedDescription = breedDescription; - return super.get(query); - } -} -``` - -The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - async post(data) { - if (data.action === 'add-trick') this.tricks.push(data.trick); - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). - -We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post` method or `put` method to do this, but we may want to separate the logic so these methods can be called separately without authorization checks. The [Resource API](../../reference/resources/) defines `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete`, or to easily configure individual capabilities. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - allowUpdate(user) { - return this.owner === user.username; - } -} -``` - -Any methods that are not defined will fall back to Harper's default authorization procedure based on users' roles. If you are using/extending a table, this is based on Harper's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to url like / to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get() { - return (await fetch(`https://best-dog-site.com/${this.getId()}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](applications/caching) provides much more information on how to use Harper's powerful caching capabilities and set up data sources. - -Harper provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../../reference/globals) and the [Resource interface](../../reference/resources/). - -## Configuring Applications/Components - -Every application or component can define their own configuration in a `config.yaml`. If you are using the application template, you will have a [default configuration in this config file](https://github.com/HarperDB/application-template/blob/main/config.yaml) (which is default configuration if no config file is provided). Within the config file, you can configure how different files and resources are loaded and handled. The default configuration file itself is documented with directions. Each entry can specify any `files` that the loader will handle, and can also optionally specify what, if any, URL `path`s it will handle. A path of `/` means that the root URLs are handled by the loader, and a path of `.` indicates that the URLs that start with this application's name are handled. - -This config file allows you define a location for static files, as well (that are directly delivered as-is for incoming HTTP requests). - -Each configuration entry can have the following properties, in addition to properties that may be specific to the individual component: - -- `files`: This specifies the set of files that should be handled the component. This is a glob pattern, so a set of files can be specified like "directory/\*\*". -- `path`: This is the URL path that is handled by this component. -- `root`: This specifies the root directory for mapping file paths to the URLs. For example, if you want all the files in `web/**` to be available in the root URL path via the static handler, you could specify a root of `web`, to indicate that the web directory maps to the root URL path. -- `package`: This is used to specify that this component is a third party package, and can be loaded from the specified package reference (which can be an NPM package, Github reference, URL, etc.). - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. Harper includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as Harper's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the Harper's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, Harper will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/versioned_docs/version-4.4/developers/applications/web-applications.md b/versioned_docs/version-4.4/developers/applications/web-applications.md deleted file mode 100644 index 02fd1893..00000000 --- a/versioned_docs/version-4.4/developers/applications/web-applications.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Web Applications on Harper ---- - -# Web Applications on Harper - -Harper is an efficient, capable, and robust platform for developing web applications, with numerous capabilities designed -specifically for optimized web application delivery. In addition, there are a number of tools and frameworks that can be used -with Harper to create web applications with standard best-practice design and development patterns. Running these frameworks -on Harper can unlock tremendous scalability and performance benefits by leveraging Harper's built-in multi-threading, -caching, and distributed design. - -Harper's unique ability to run JavaScript code directly on the server side, combined with its built-in database for data storage, querying, and caching -allows you to create full-featured web applications with a single platform. This eliminates the overhead of legacy solutions that -require separate application servers, databases, and caching layers, and their requisite communication overhead and latency, while -allowing the full stack to deployed to distributed locations with full local response handling, providing an incredibly low latency web experience. - -## Web Application Frameworks - -With built-in caching mechanisms, and an easy-to-use JavaScript API for interacting with data, creating full-featured applications -using popular frameworks is a simple and straightforward process. - -Get started today with one of our examples: - -- [Next.js](https://github.com/HarperDB/nextjs-example) -- [React SSR](https://github.com/HarperDB/react-ssr-example) -- [Vue SSR](https://github.com/HarperDB/vue-ssr-example) -- [Svelte SSR](https://github.com/HarperDB/svelte-ssr-example) -- [Solid SSR](https://github.com/HarperDB/solid-ssr-example) - -## Cookie Support - -Harper includes support for authenticated sessions using cookies. This allows you to create secure, authenticated web applications -using best-practice security patterns, allowing users to login and maintain a session without any credential storage on the client side -that can be compromised. A login endpoint can be defined by exporting a resource and calling the `login` method on the request object. For example, this could be a login endpoint in your resources.js file: - -```javascript -export class Login extends Resource { - async post(data) { - const { username, password } = data; - await request.login(username, password); - return { message: 'Logged in!' }; - } -} -``` - -This endpoint can be called from the client side using a standard fetch request, a cookie will be returned, and the session will be maintained by Harper. -This allows web applications to directly interact with Harper and database resources, without needing to go through extra layers of authentication handling. - -## Browser Caching Negotiation - -Browsers support caching negotiation with revalidation, which allows requests for locally cached data to be sent to servers with a tag or timestamp. Harper REST functionality can fully interact with these headers, and return `304 Not Modified` response based on prior `Etag` sent in headers. It is highly recommended that you utilize the [REST interface](../rest) for accessing tables, as it facilitates this downstream browser caching. Timestamps are recorded with all records and are then returned [as the `ETag` in the response](../rest#cachingconditional-requests). Utilizing this browser caching can greatly reduce the load on your server and improve the performance of your web application by being able to instantly use locally cached data after revalidation from the server. - -## Built-in Cross-Origin Resource Sharing (CORS) - -Harper includes built-in support for Cross-Origin Resource Sharing (CORS), which allows you to define which domains are allowed to access your Harper instance. This is a critical security feature for web applications, as it prevents unauthorized access to your data from other domains, while allowing cross-domain access from known hosts. You can define the allowed domains in your [Harper configuration file](../../deployments/configuration#http), and Harper will automatically handle the CORS headers for you. - -## More Resources - -Make sure to check out our developer videos too: - -- [Next.js on Harper | Step-by-Step Guide for Next Level Next.js Performance](https://youtu.be/GqLEwteFJYY) -- [Server-side Rendering (SSR) with Multi-Tier Cache Demo](https://youtu.be/L-tnBNhO9Fc) diff --git a/versioned_docs/version-4.4/developers/components/built-in.md b/versioned_docs/version-4.4/developers/components/built-in.md deleted file mode 100644 index ec5bc80a..00000000 --- a/versioned_docs/version-4.4/developers/components/built-in.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: Built-In Components ---- - -# Built-In Components - -Harper provides extended features using built-in components. They do **not** need to be installed with a package manager, and simply must be specified in a config to run. These are used throughout many Harper docs, guides, and examples. Unlike external components which have their own semantic versions, built-in components follow Harper's semantic version. - -- [fastifyRoutes](#fastifyroutes) - - [graphql](#graphql) - - [graphqlSchema](#graphqlschema) - - [jsResource](#jsresource) - - [rest](#rest) - - [roles](#roles) - - [static](#static) - -## fastifyRoutes - -Specify custom endpoints using [Fastify](https://fastify.dev/). - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Define Fastify Routes](../applications/define-routes) - -```yaml -fastifyRoutes: - files: './routes/*.js' -``` - -## graphql - -> GraphQL querying provides functionality for mapping GraphQL querying functionality to exported resources, and is based on the [GraphQL Over HTTP / GraphQL specifications](https://graphql.github.io/graphql-over-http/draft/#) (it is designed to intuitively map queries to Harper resources, but does not implement the full [specification](https://spec.graphql.org/) of resolvers, subscribers, and mutations). - -Enables GraphQL querying via a `/graphql` endpoint loosely implementing the GraphQL Over HTTP specification. - -Complete documentation for this feature is available here: [GraphQL](../../reference/graphql) - -```yaml -graphql: true -``` - -## graphqlSchema - -Specify schemas for Harper tables and resources via GraphQL schema syntax. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Schemas](../applications/defining-schemas) - -```yaml -graphqlSchema: - files: './schemas.graphql' -``` - -## jsResource - -Specify custom, JavaScript based Harper resources. - -Refer to the Application [Custom Functionality with JavaScript](../applications/#custom-functionality-with-javascript) guide, or [Resource Class](../../reference/resource) reference documentation for more information on custom resources. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -```yaml -jsResource: - files: './resource.js' -``` - -## rest - -Enable automatic REST endpoint generation for exported resources with this component. - -Complete documentation for this feature is available here: [REST](../rest) - -```yaml -rest: true -``` - -This component contains additional options: - -To enable `Last-Modified` header support: - -```yaml -rest: - lastModified: true -``` - -To disable automatic WebSocket support: - -```yaml -rest: - webSocket: false -``` - -## roles - -Specify roles for Harper tables and resources. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Roles](../applications/defining-roles) - -```yaml -roles: - files: './roles.yaml' -``` - -## static - -Specify which files to server statically from the Harper HTTP endpoint. Built using the [send](https://www.npmjs.com/package/send) and [serve-static](https://www.npmjs.com/package/serve-static) modules. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -```yaml -static: - files: './web/*' -``` diff --git a/versioned_docs/version-4.4/developers/components/index.md b/versioned_docs/version-4.4/developers/components/index.md deleted file mode 100644 index 9becbe7a..00000000 --- a/versioned_docs/version-4.4/developers/components/index.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Components ---- - -# Components - -Harper components are a core Harper concept defined as flexible JavaScript based _extensions_ of the highly extensible core Harper platform. They are executed by Harper directly and have complete access to the Harper [Global APIs](../../reference/globals) (such as `Resource`, `databases`, and `tables`). - -A key aspect to components are their extensibility; components can be built on other components. For example, a [Harper Application](../../developers/applications/) is a component that uses many other components. The [application template](https://github.com/HarperDB/application-template) demonstrates many of Harper's built-in components such as `rest` (for automatic REST endpoint generation), `graphqlSchema` (for table schema definitions), and many more. - -From management to development, the following pages document everything a developer needs to know about Harper components. - -- [Managing Components](components/managing) - developing, installing, deploying, and executing Harper components locally and remotely -- [Technical Reference](components/reference) - detailed, technical reference for component development -- [Built-In Components](components/built-in) - documentation for all of Harper's built-in components (i.e. `rest`) - -## Custom Components - -The following list is all of Harper's officially maintained, custom components. They are all available on npm and GitHub. - -- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) -- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) -- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) -- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) -- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) diff --git a/versioned_docs/version-4.4/developers/components/managing.md b/versioned_docs/version-4.4/developers/components/managing.md deleted file mode 100644 index 97402e39..00000000 --- a/versioned_docs/version-4.4/developers/components/managing.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Managing ---- - -# Managing - -Harper offers several approaches to managing components that differ between local development and Harper managed instances. This page will cover the recommended methods of developing, installing, deploying, and running Harper components. - -## Local Development - -Harper is designed to be simple to run locally. Generally, Harper should be installed locally on a machine using a global package manager install (i.e. `npm i -g harperdb`). - -> Before continuing, ensure Harper is installed and the `harperdb` CLI is available. For more information, review the [installation guide](../../deployments/install-harper/). - -When developing a component locally there are a number of ways to run it on Harper. - -### `dev` and `run` commands - -The quickest way to run a component is by using the `dev` command within the component directory. - -The `harperdb dev .` command will automatically watch for file changes within the component directory and restart the Harper threads when changes are detected. - -The `dev` command will **not** restart the main thread; if this is a requirement, switch to using `run` instead and manually start/stop the process to execute the main thread. - -Stop execution for either of these processes by sending a SIGINT (generally CTRL+C) signal to the process. - -### Deploying to a local Harper instance - -Alternatively, to mimic interfacing with a hosted Harper instance, use operation commands instead. - -1. Start up Harper with `harperdb` -1. _Deploy_ the component to the local instance by executing: - - ```sh - harperdb deploy_component \ - project= \ - package= \ - restart=true - ``` - - - Make sure to omit the `target` option so that it _deploys_ to the Harper instance running locally - - The `package=` option creates a symlink to the component simplifying restarts - - By default, the `deploy_component` operation command will _deploy_ the current directory by packaging it up and streaming the bytes. By specifying `package`, it skips this and references the file path directly - - The `restart=true` option automatically restarts Harper threads after the component is deployed - - If set to `'rolling'`, a rolling restart will be triggered after the component is deployed - -1. In another terminal, use the `harperdb restart` command to restart the instance's threads at any time - - With `package=`, the component source is symlinked so changes will automatically be picked up between restarts - - If `package` was omitted, run the `deploy_component` command again with any new changes -1. To remove the component use `harperdb drop_component project=` - -Similar to the previous section, if the main thread needs to be restarted, start and stop the Harper instance manually (with the component deployed). Upon Harper startup, the component will automatically be loaded and executed across all threads. - -> Not all [component operations](../operations-api/components) are available via CLI. When in doubt, switch to using the Operations API via network requests to the local Harper instance. - -For example, to properly _deploy_ a `test-component` locally, the command would look like: - -```sh -harperdb deploy_component \ - project=test-component \ - package=/Users/dev/test-component \ - restart=true -``` - -> If the current directory is the component directory, use a shortcut such as `package=$(pwd)` to avoid typing out the complete path. - -## Remote Management - -Managing components on a remote Harper instance is best accomplished through [component operations](../operations-api/components), similar to using the `deploy_component` command locally. Before continuing, always backup critical Harper instances. Managing, deploying, and executing components can directly impact a live system. - -Remote Harper instances work very similarly to local Harper instances. The primary component management operations still include `deploy_component`, `drop_component`, and `restart`. - -The key to remote management is specifying a remote `target` along with appropriate username/password values. These can all be specified using CLI arguments: `target`, `username`, and `password`. Alternatively, the `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` environment variables can replace the `username` and `password` arguments. - -All together: - -```sh -harperdb deploy_component \ - project= \ - package= \ - username= \ - password= \ - target= \ - restart=true \ - replicated=true -``` - -Or, using environment variables: - -```sh -export CLI_TARGET_USERNAME= -export CLI_TARGET_PASSWORD= -harperdb deploy_component \ - project= \ - package= \ - target= \ - restart=true \ - replicated=true -``` - -Unlike local development where `package` should be set to a local file path for symlinking and improved development experience purposes, now it has some additional options. - -A local component can be deployed to a remote instance by **omitting** the `package` field. Harper will automatically package the local directory and include that along with the rest of the deployment operation. - -Furthermore, the `package` field can be set to any valid [npm dependency value](https://docs.npmjs.com/cli/v11/configuring-npm/package-json#dependencies). - -- For components deployed to npm, specify the package name: `package="@harperdb/status-check"` -- For components on GitHub, specify the URL: `package="https://github.com/HarperDB/status-check"`, or the shorthand `package=HarperDB/status-check` -- Private repositories also work if the correct SSH keys are on the server: `package="git+ssh://git@github.com:HarperDB/secret-component.git"` - - Reference the [SSH Key](../operations-api/components#add-ssh-key) operations for more information on managing SSH keys on a remote instance -- Even tarball URLs are supported: `package="https://example.com/component.tar.gz"` - -> When using git tags, we highly recommend that you use the semver directive to ensure consistent and reliable installation by npm. In addition to tags, you can also reference branches or commit numbers. - -These `package` values are all supported because behind-the-scenes, Harper is generating a `package.json` file for the components. Then, it uses a form of `npm install` to resolve them as dependencies. This is why symlinks are generated when specifying a file path locally. The following [Advanced](./managing#advanced) section explores this pattern in more detail. - -Finally, don't forget to include `restart=true`, or run `harperdb restart target=`. - -## Advanced - -The following methods are advanced and should be executed with caution as they can have unintended side-effects. Always backup any critical Harper instances before continuing. - -First, locate the Harper installation `rootPath` directory. Generally, this is `~/hdb`. It can be retrieved by running `harperdb get_configuration` and looking for the `rootPath` field. - -> For a useful shortcut on POSIX compliant machines run: `harperdb get_configuration json=true | jq ".rootPath" | sed 's/"/g'` - -This path is the Harper instance. Within this directory, locate the root config titled `harperdb-config.yaml`, and the components root path. The components root path will be `/components` by default (thus, `~/hdb/components`), but it can also be configured. If necessary, use `harperdb get_configuration` again and look for the `componentsRoot` field for the exact path. - -### Adding components to root - -Similar to how components can specify other components within their `config.yaml`, components can be added to Harper by adding them to the `harperdb-config.yaml`. - -The configuration is very similar to that of `config.yaml`. Entries are comprised of a top-level `:`, and an indented `package: ` field. Any additional component options can also be included as indented fields. - -```yaml -status-check: - package: '@harperdb/status-check' -``` - -The key difference between this and a component's `config.yaml` is that the name does **not** need to be associated with a `package.json` dependency. When Harper starts up, it transforms these configurations into a `package.json` file, and then executes a form of `npm install`. Thus, the `package: ` can be any valid dependency syntax such as npm packages, GitHub repos, tarballs, and local directories are all supported. - -Given a root config like: - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from npm -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -Harper will generate a `package.json` like: - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -npm will install all the components and store them in ``. A symlink back to `/node_modules` is also created for dependency resolution purposes. - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -By specifying a file path, npm will generate a symlink and then changes will be automatically picked up between restarts. diff --git a/versioned_docs/version-4.4/developers/components/reference.md b/versioned_docs/version-4.4/developers/components/reference.md deleted file mode 100644 index 525ffa6a..00000000 --- a/versioned_docs/version-4.4/developers/components/reference.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: Component Reference ---- - -# Component Reference - -The technical definition of a Harper component is fairly loose. In the absolute, simplest form, a component is any JavaScript module that is compatible with the [default component configuration](#default-component-configuration). For example, a module with a singular `resources.js` file is technically a valid component. - -Harper provides many features as _built-in components_, these can be used directly without installing any other dependencies. - -Other features are provided by _custom components_. These can be npm packages such as [@harperdb/nextjs](https://github.com/HarperDB/nextjs) and [@harperdb/apollo](https://github.com/HarperDB/apollo) (which are maintained by Harper), or something maintained by the community. Custom components follow the same configuration rules and use the same APIs that Harper's built-in components do. The only difference is that they must be apart of the component's dependencies. - -> Documentation is available for all [built-in](./built-in) and [custom](./#custom-components) Harper components. - - - -## Component Configuration - -Harper components are configured with a `config.yaml` file located in the root of the component module directory. This file is how a component configures other components it depends on. Each entry in the file starts with a component name, and then configuration values are indented below it. - -```yaml -name: - option-1: value - option-2: value -``` - -It is the entry's `name` that is used for component resolution. It can be one of the [built-in components](./built-in), or it must match a package dependency of the component as specified by `package.json`. The [Custom Component Configuration](#custom-component-configuration) section provides more details and examples. - -For some built-in components they can be configured with as little as a top-level boolean; for example, the [rest](./built-in#rest) extension can be enabled with just: - -```yaml -rest: true -``` - -Other components (built-in or custom), will generally have more configuration options. Some options are ubiquitous to the Harper platform, such as the `files`, `path`, and `root` options for a [Resource Extension](#resource-extension-configuration), or `package` for a [custom component](#custom-component-configuration). Additionally, [custom options](#protocol-extension-configuration) can be defined for [Protocol Extensions](#protocol-extension). - -### Custom Component Configuration - -Any custom component **must** be configured with the `package` option in order for Harper to load that component. When enabled, the name of package must match a dependency of the component. For example, to use the `@harperdb/nextjs` extension, it must first be included in `package.json`: - -```json -{ - "dependencies": { - "@harperdb/nextjs": "^1.0.0" - } -} -``` - -Then, within `config.yaml` it can be enabled and configured using: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - # ... -``` - -Since npm allows for a [variety of dependency configurations](https://docs.npmjs.com/cli/configuring-npm/package-json#dependencies), this can be used to create custom references. For example, to depend on a specific GitHub branch, first update the `package.json`: - -```json -{ - "dependencies": { - "harper-nextjs-test-feature": "HarperDB/nextjs#test-feature" - } -} -``` - -And now in `config.yaml`: - -```yaml -harper-nextjs-test-feature: - package: '@harperdb/nextjs' - files: '/*' - # ... -``` - -### Default Component Configuration - -Harper components do not need to specify a `config.yaml`. Harper uses the following default configuration to load components. - -```yaml -rest: true -graphql: true -graphqlSchema: - files: '*.graphql' -roles: - files: 'roles.yaml' -jsResource: - files: 'resources.js' -fastifyRoutes: - files: 'routes/*.js' - path: '.' -static: - files: 'web/**' -``` - -Refer to the [built-in components](./built-in) documentation for more information on these fields. - -If a `config.yaml` is defined, it will **not** be merged with the default config. - -## Extensions - -A Harper Extension is a extensible component that is intended to be used by other components. The built-in components [graphqlSchema](./built-in#graphqlschema) and [jsResource](./built-in#jsresource) are both examples of extensions. - -There are two key types of Harper Extensions: **Resource Extension** and **Protocol Extensions**. The key difference is a **Protocol Extensions** can return a **Resource Extension**. - -Functionally, what makes an extension a component is the contents of `config.yaml`. Unlike the Application Template referenced earlier, which specified multiple components within the `config.yaml`, an extension will specify an `extensionModule` option. - -- `extensionModule` - `string` - _required_ - A path to the extension module source code. The path must resolve from the root of the extension module directory. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) `config.yaml` specifies `extensionModule: ./extension.js`. - -If the extension is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `extensionModule: ./dist/index.js`) - -It is also recommended that all extensions have a `package.json` that specifies JavaScript package metadata such as name, version, type, etc. Since extensions are just JavaScript packages, they can do anything a JavaScript package can normally do. It can be written in TypeScript, and compiled to JavaScript. It can export an executable (using the [bin](https://docs.npmjs.com/cli/configuring-npm/package-json#bin) property). It can be published to npm. The possibilities are endless! - -Furthermore, what defines an extension separately from a component is that it leverages any of the [Resource Extension](#resource-extension-api) or [Protocol Extension](#protocol-extension-api) APIs. The key is in the name, **extensions are extensible**. - -### Resource Extension - -A Resource Extension is for processing a certain type of file or directory. For example, the built-in [jsResource](./built-in#jsresource) extension handles executing JavaScript files. - -Resource Extensions are comprised of four distinct function exports, [`handleFile()`](#handlefilecontents-urlpath-path-resources-void--promisevoid), [`handleDirectory()`](#handledirectoryurlpath-path-resources-boolean--void--promiseboolean--void), [`setupFile()`](#setupfilecontents-urlpath-path-resources-void--promisevoid), and [`setupDirectory()`](#setupdirectoryurlpath-path-resources-boolean--void--promiseboolean--void). The `handleFile()` and `handleDirectory()` methods are executed on **all worker threads**, and are _executed again during restarts_. The `setupFile()` and `setupDirectory()` methods are only executed **once** on the **main thread** during the initial system start sequence. - -> Keep in mind that the CLI command `harperdb restart` or CLI argument `restart=true` only restarts the worker threads. If a component is deployed using `harperdb deploy`, the code within the `setupFile()` and `setupDirectory()` methods will not be executed until the system is completely shutdown and turned back on. - -Other than their execution behavior, the `handleFile()` and `setupFile()` methods, and `handleDirectory()` and `setupDirectory()` methods have identical function definitions (arguments and return value behavior). - -#### Resource Extension Configuration - -Any [Resource Extension](#resource-extension) can be configured with the `files`, `path`, and `root` options. These options control how _files_ and _directories_ are resolved in order to be passed to the extension's `handleFile()`, `setupFile()`, `handleDirectory()`, and `setupDirectory()` methods. - -- `files` - `string` - _required_ - Specifies the set of files and directories that should be handled by the component. Can be a glob pattern. -- `path` - `string` - _optional_ - Specifies the URL path to be handled by the component. -- `root` - `string` - _optional_ - Specifies the root directory for mapping file paths to the URLs. - -For example, to configure the [static](./built-in#static) component to server all files from `web` to the root URL path: - -```yaml -static: - files: 'web/**' - root: 'web' -``` - -Or, to configure the [graphqlSchema](./built-in#graphqlschema) component to load all schemas within the `src/schema` directory: - -```yaml -graphqlSchema: - files: 'src/schema/*.schema' -``` - -#### Resource Extension API - -In order for an extension to be classified as a Resource Extension it must implement at least one of the `handleFile()`, `handleDirectory()`, `setupFile()`, or `setupDirectory()` methods. As a standalone extension, these methods should be named and exported directly. For example: - -```js -// ESM -export function handleFile() {} -export function setupDirectory() {} - -// or CJS -function handleDirectory() {} -function setupFile() {} - -module.exports = { handleDirectory, setupFile }; -``` - -When returned by a [Protocol Extension](#protocol-extension), these methods should be defined on the object instead: - -```js -export function start() { - return { - handleFile() {}, - }; -} -``` - -##### `handleFile(contents, urlPath, path, resources): void | Promise` - -##### `setupFile(contents, urlPath, path, resources): void | Promise` - -These methods are for processing individual files. They can be async. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `contents` - `Buffer` - The contents of the file -- `urlPath` - `string` - The recommended URL path of the file -- `path` - `string` - The relative path of the file - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `void | Promise` - -##### `handleDirectory(urlPath, path, resources): boolean | void | Promise` - -##### `setupDirectory(urlPath, path, resources): boolean | void | Promise` - -These methods are for processing directories. They can be async. - -If the function returns or resolves a truthy value, then the component loading sequence will end and no other entries within the directory will be processed. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `urlPath` - `string` - The recommended URL path of the file -- `path` - `string` - The relative path of the directory - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `boolean | void | Promise` - -### Protocol Extension - -A Protocol Extension is a more advanced form of a Resource Extension and is mainly used for implementing higher level protocols. For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) handles building and running a Next.js project. A Protocol Extension is particularly useful for adding custom networking handlers (see the [`server`](../../reference/globals#server) global API documentation for more information). - -#### Protocol Extension Configuration - -In addition to the `files`, `path`, and `root` [Resource Extension configuration](#resource-extension-configuration) options, and the `package` [Custom Component configuration](#custom-component-configuration) option, Protocol Extensions can also specify additional configuration options. Any options added to the extension configuration (in `config.yaml`), will be passed through to the `options` object of the `start()` and `startOnMainThread()` methods. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs#options) specifies multiple option that can be included in its configuration. For example, a Next.js app using `@harperdb/nextjs` may specify the following `config.yaml`: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - files: '/*' - prebuilt: true - dev: false -``` - -Many protocol extensions will use the `port` and `securePort` options for configuring networking handlers. Many of the [`server`](../../reference/globals#server) global APIs accept `port` and `securePort` options, so components replicated this for simpler pass-through. - -#### Protocol Extension API - -A Protocol Extension is made up of two distinct methods, [`start()`](#startoptions-resourceextension--promiseresourceextension) and [`startOnMainThread()`](#startonmainthreadoptions-resourceextension--promiseresourceextension). Similar to a Resource Extension, the `start()` method is executed on _all worker threads_, and _executed again on restarts_. The `startOnMainThread()` method is **only** executed **once** during the initial system start sequence. These methods have identical `options` object parameter, and can both return a Resource Extension (i.e. an object containing one or more of the methods listed above). - -##### `start(options): ResourceExtension | Promise` - -##### `startOnMainThread(options): ResourceExtension | Promise` - -Parameters: - -- `options` - `Object` - An object representation of the extension's configuration options. - -Returns: `Object` - An object that implements any of the [Resource Extension APIs](#resource-extension-api) diff --git a/versioned_docs/version-4.4/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.4/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index c4254430..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created Harper will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.4/developers/operations-api/bulk-operations.md deleted file mode 100644 index aef33230..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into Harper - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which Harper is running. For example, the path to a CSV on your computer will produce an error if your Harper instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running Harper - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/clustering-nats.md b/versioned_docs/version-4.4/developers/operations-api/clustering-nats.md deleted file mode 100644 index 9e7b8211..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/clustering-nats.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: Clustering using NATS ---- - -# Clustering using NATS - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional Harper instance with associated subscriptions. Learn more about [Harper clustering here](../../reference/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about [Harper clustering here](../../reference/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Set Node Replication - -A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_node_replication` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: - - `database` _(optional)_ - the database to replicate from - - `table` _(required)_ - the table to replicate from - - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table -- - -### Body - -```json -{ - "operation": "set_node_replication", - "node_name": "node1", - "subscriptions": [ - { - "table": "dog", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a Harper instance and associated subscriptions from the cluster. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `node_name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Purge Stream - -Will purge messages from a stream - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `purge_stream` -- `database` _(required)_ - the name of the database where the streams table resides -- `table` _(required)_ - the name of the table that belongs to the stream -- `options` _(optional)_ - control how many messages get purged. Options are: - - `keep` - purge will keep this many most recent messages - - `seq` - purge all messages up to, but not including, this sequence - -### Body - -```json -{ - "operation": "purge_stream", - "database": "dev", - "table": "dog", - "options": { - "keep": 100 - } -} -``` - ---- diff --git a/versioned_docs/version-4.4/developers/operations-api/clustering.md b/versioned_docs/version-4.4/developers/operations-api/clustering.md deleted file mode 100644 index 8533de30..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/clustering.md +++ /dev/null @@ -1,345 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -The following operations are available for configuring and managing [Harper replication](../replication/). - -_**If you are using NATS for clustering, please see the**_ [_**NATS Clustering Operations**_](./clustering-nats) _**documentation.**_ - -## Add Node - -Adds a new Harper instance to the cluster. If `subscriptions` are provided, it will also create the replication relationships between the nodes. If they are not provided a fully replicating system will be created. [Learn more about adding nodes here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `hostname` or `url` _(required)_ - one of these fields is required. You must provide either the `hostname` or the `url` of the node you want to add -- `verify_tls` _(optional)_ - a boolean which determines if the TLS certificate should be verified. This will allow the Harper default self-signed certificates to be accepted. Defaults to `true` -- `authorization` _(optional)_ - an object or a string which contains the authorization information for the node being added. If it is an object, it should contain `username` and `password` fields. If it is a string, it should use HTTP `Authorization` style credentials -- `retain_authorization` _(optional)_ - a boolean which determines if the authorization credentials should be retained/stored and used everytime a connection is made to this node. If `true`, the authorization will be stored on the node record. Generally this should not be used, as mTLS/certificate based authorization is much more secure and safe, and avoids the need for storing credentials. Defaults to `false`. -- `subscriptions` _(optional)_ - The relationship created between nodes. If not provided a fully replicated cluster will be setup. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate - - `table` - the table to replicate - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'server-two' to cluster" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance in the cluster. - -_Operation is restricted to super_user roles only_ - -_Note: will attempt to add the node if it does not exist_ - -- `operation` _(required)_ - must always be `update_node` -- `hostname` _(required)_ - the `hostname` of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'server-two'" -} -``` - ---- - -## Remove Node - -Removes a Harper node from the cluster and stops replication, [Learn more about remove node here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are removing - -### Body - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'server-two' from cluster" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. - -`database_sockets` shows the actual websocket connections that exist between nodes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "type": "cluster-status", - "connections": [ - { - "url": "wss://server-two:9925", - "subscriptions": [ - { - "schema": "dev", - "table": "my-table", - "publish": true, - "subscribe": true - } - ], - "name": "server-two", - "database_sockets": [ - { - "database": "dev", - "connected": true, - "latency": 0.84197798371315, - "threadId": 1, - "nodes": ["server-two"] - } - ] - } - ], - "node_name": "server-one", - "is_enabled": true -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object following the `add_node` schema. - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password2" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "my-table", - "subscribe": true, - "publish": false - } - ] - }, - { - "hostname": "server-three", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password3" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Cluster Set Routes - -Adds a route/routes to the `replication.routes` configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `routes` _(required)_ - the routes field is an array that specifies the routes for clustering. Each element in the array can be either a string or an object with `hostname` and `port` properties. - -### Body - -```json -{ - "operation": "cluster_set_routes", - "routes": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets the replication routes from the Harper config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -[ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } -] -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from the Harper config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/components.md b/versioned_docs/version-4.4/developers/operations-api/components.md deleted file mode 100644 index a087cc13..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/components.md +++ /dev/null @@ -1,511 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a predefined template. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -If deploying with the `payload` option, Harper will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory. - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have an installed SSH keys on the server: - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string -- `restart` _(optional)_ - must be either a boolean or the string `rolling`. If set to `rolling`, a rolling restart will be triggered after the component is deployed, meaning that each node in the cluster will be sequentially restarted (waiting for the last restart to start the next). If set to `true`, the restart will not be rolling, all nodes will be restarted in parallel. If `replicated` is `true`, the restart operations will be replicated across the cluster. -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. This can be used to install dependencies with pnpm or yarn, for example, like: `"install_command": "npm install -g pnpm && pnpm install"` - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template", - "replicated": true -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete -- `replicated` _(optional)_ - if true, Harper will replicate the component deletion to all nodes in the cluster. Must be a boolean. -- `restart` _(optional)_ - if true, Harper will restart after dropping the component. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` -- `replicated` _(optional)_ - if true, Harper will replicate the component update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` - -## Add SSH Key - -Adds an SSH key for deploying components from private repositories. This will also create an ssh config file that will be used when deploying the components. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_ssh_key` -- `name` _(required)_ - the name of the key -- `key` _(required)_ - the private key contents. Line breaks must be delimited with -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with -- `replicated` _(optional)_ - if true, Harper will replicate the key to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nfake\nkey\n-----END OPENSSH PRIVATE KEY-----", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Added ssh key: harperdb-private-component" -} -``` - -### Generated Config and Deploy Component "package" string examples - -``` -#harperdb-private-component -Host harperdb-private-component.github.com - HostName github.com - User git - IdentityFile /hdbroot/ssh/harperdb-private-component.key - IdentitiesOnly yes -``` - -``` -"package": "git+ssh://git@:.git#semver:v1.2.3" - -"package": "git+ssh://git@harperdb-private-component.github.com:HarperDB/harperdb-private-component.git#semver:v1.2.3" -``` - -Note that `deploy_component` with a package uses `npm install` so the url must be a valid npm format url. The above is an example of a url using a tag in the repo to install. - -## Update SSH Key - -Updates the private key contents of an existing SSH key. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_ssh_key` -- `name` _(required)_ - the name of the key to be updated -- `key` _(required)_ - the private key contents. Line breaks must be delimited with -- `replicated` _(optional)_ - if true, Harper will replicate the key update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "update_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nNEWFAKE\nkey\n-----END OPENSSH PRIVATE KEY-----", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Updated ssh key: harperdb-private-component" -} -``` - -## Delete SSH Key - -Deletes a SSH key. This will also remove it from the generated SSH config. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_ssh_key` -- `name` _(required)_ - the name of the key to be deleted -- `replicated` _(optional)_ - if true, Harper will replicate the key deletion to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "name": "harperdb-private-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Deleted ssh key: harperdb-private-component" -} -``` - -## List SSH Keys - -List off the names of added SSH keys - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_ssh_keys` - -### Body - -```json -{ - "operation": "list_ssh_keys" -} -``` - -### Response: 200 - -```json -[ - { - "name": "harperdb-private-component" - } -] -``` - -_Note: Additional SSH keys would appear as more objects in this array_ - -## Set SSH Known Hosts - -Sets the SSH known_hosts file. This will overwrite the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_ssh_known_hosts` -- `known_hosts` _(required)_ - The contents to set the known_hosts to. Line breaks must be delimite d with -- `replicated` _(optional)_ - if true, Harper will replicate the known hosts to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_ssh_known_hosts", - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Known hosts successfully set" -} -``` - -## Get SSH Known Hosts - -Gets the contents of the known_hosts file - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_ssh_known_hosts` - -### Body - -```json -{ - "operation": "get_ssh_known_hosts" -} -``` - -### Response: 200 - -```json -{ - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/custom-functions.md b/versioned_docs/version-4.4/developers/operations-api/custom-functions.md deleted file mode 100644 index 37b45ba8..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,277 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDStudio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. Harper Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.4/developers/operations-api/databases-and-tables.md deleted file mode 100644 index 7c17fb4d..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts about 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** - -_Note: Harper will automatically create new attributes on insert and update if they do not already exist within the database._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with Harper not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (Harper actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup Harper databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/index.md b/versioned_docs/version-4.4/developers/operations-api/index.md deleted file mode 100644 index 55bb6709..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/index.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling Harper. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../../deployments/configuration#operationsapi), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security#basic-auth) or [JWT authentication](./security#jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Clustering with NATS](operations-api/clustering-nats) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [Utilities](operations-api/utilities) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/jobs.md b/versioned_docs/version-4.4/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/logs.md b/versioned_docs/version-4.4/developers/operations-api/logs.md deleted file mode 100644 index 52e52740..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/logs.md +++ /dev/null @@ -1,732 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read Harper Log - -Returns log outputs from the primary Harper log based on the provided search criteria. [Read more about Harper logging here](../../administration/logging/standard-logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` -- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. [Read more about Harper transaction logs here](./logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the Harper configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. [Read more about Harper transaction logs here.](../../administration/logging/transaction-logging#read_transaction_log) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. [Read more about Harper transaction logs here](./logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values \[`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the Harper user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the Harper configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.4/developers/operations-api/nosql-operations.md deleted file mode 100644 index 11a99ab6..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,384 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID or incremented number (depending on type) will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `upsert` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `search_attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_value` _(required)_ - value you wish to search - wild cards are allowed -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "search_attribute": "owner_name", - "search_value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: - - `attribute` _(required)_ - The attribute to sort by - - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) - - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: - - `search_attribute` _(required)_ - the attribute you wish to search, can be any attribute - - `search_type` _(required)_ - the type of search to perform - `equals`, `not_equal`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_value` _(required)_ - case-sensitive value you wish to search. If the `search_type` is `between` then use an array of two values to search between (both inclusive) - Or a set of grouped conditions has the following properties: - - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` - - `conditions` _(required)_ - the array of conditions objects as described above. - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "sort": { - "attribute": "id", - "next": { - "attribute": "age", - "descending": true - } - }, - "get_attributes": ["*"], - "conditions": [ - { - "search_attribute": "age", - "search_type": "between", - "search_value": [5, 8] - }, - { - "search_attribute": "weight_lbs", - "search_type": "greater_than", - "search_value": 40 - }, - { - "operator": "or", - "conditions": [ - { - "search_attribute": "adorable", - "search_type": "equals", - "search_value": true - }, - { - "search_attribute": "lovable", - "search_type": "equals", - "search_value": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.4/developers/operations-api/quickstart-examples.md deleted file mode 100644 index a6c8f637..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -Harper recommends utilizing [Harper Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the Harper Operations API. - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in Harper are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. - -Harper does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and Harper will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -Harper supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/registration.md b/versioned_docs/version-4.4/developers/operations-api/registration.md deleted file mode 100644 index 28c6a0e9..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/registration.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the Harper instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Get Fingerprint - -Returns the Harper fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -Sets the Harper license as generated by Harper License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/sql-operations.md b/versioned_docs/version-4.4/developers/operations-api/sql-operations.md deleted file mode 100644 index 4b7076bb..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: SQL Operations ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/token-authentication.md b/versioned_docs/version-4.4/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.4/developers/operations-api/users-and-roles.md deleted file mode 100644 index 91f222b9..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "developer" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. Harper will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your Harper instance. If set to false, user will not be able to access your instance of Harper. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. Harper will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your Harper instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/versioned_docs/version-4.4/developers/operations-api/utilities.md b/versioned_docs/version-4.4/developers/operations-api/utilities.md deleted file mode 100644 index 6d24031c..00000000 --- a/versioned_docs/version-4.4/developers/operations-api/utilities.md +++ /dev/null @@ -1,463 +0,0 @@ ---- -title: Utilities ---- - -# Utilities - -## Restart - -Restarts the Harper instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified Harper service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` -- `replicated` _(optional)_ - must be a boolean. If set to `true`, Harper will replicate the restart service operation across all nodes in the cluster. The restart will occur as a rolling restart, ensuring that each node is fully restarted before the next node begins restarting. - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'metrics', 'threads', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` - ---- - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` -- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Install Node Modules - -:::warning Deprecated -This operation is deprecated, as it is handled automatically by [deploy_component](./components#deploy-component) and [restart](#restart). -::: - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` - ---- - -## Set Configuration - -Modifies the Harper configuration file parameters. Must follow with a [restart](#restart) or [restart_service](#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the Harper configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "replication": { - "hostname": "node1", - "databases": "*", - "routes": null, - "url": "wss://127.0.0.1:9925" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` - ---- - -## Add Certificate - -Adds or updates a certificate in the `hdb_certificate` system table. -If a `private_key` is provided it will **not** be stored in `hdb_certificate`, it will be written to file in `/keys/`. -If a `private_key` is not passed the operation will search for one that matches the certificate. If one is not found an error will be returned. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_certificate` -- `name` _(required)_ - a unique name for the certificate -- `certificate` _(required)_ - a PEM formatted certificate string -- `is_authority` _(required)_ - a boolean indicating if the certificate is a certificate authority -- `hosts` _(optional)_ - an array of hostnames that the certificate is valid for -- `private_key` _(optional)_ - a PEM formatted private key string - -### Body - -```json -{ - "operation": "add_certificate", - "name": "my-cert", - "certificate": "-----BEGIN CERTIFICATE-----ZDFAay... -----END CERTIFICATE-----", - "is_authority": false, - "private_key": "-----BEGIN RSA PRIVATE KEY-----Y4dMpw5f... -----END RSA PRIVATE KEY-----" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added certificate: my-cert" -} -``` - ---- - -## Remove Certificate - -Removes a certificate from the `hdb_certificate` system table and deletes the corresponding private key file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_certificate` -- `name` _(required)_ - the name of the certificate - -### Body - -```json -{ - "operation": "remove_certificate", - "name": "my-cert" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed my-cert" -} -``` - ---- - -## List Certificates - -Lists all certificates in the `hdb_certificate` system table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_certificates` - -### Body - -```json -{ - "operation": "list_certificates" -} -``` - -### Response: 200 - -```json -[ - { - "name": "HarperDB-Certificate-Authority-node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\nTANBgkqhk... S34==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": true, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "serial_number": "5235345", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - }, - { - "name": "node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\ngIEcSR1M... 5bv==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": false, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=node.1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject_alt_name": "IP Address:127.0.0.1, DNS:localhost, IP Address:0:0:0:0:0:0:0:1, DNS:node.1", - "serial_number": "5243646", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - } -] -``` diff --git a/versioned_docs/version-4.4/developers/real-time.md b/versioned_docs/version-4.4/developers/real-time.md deleted file mode 100644 index 9c5c79e4..00000000 --- a/versioned_docs/version-4.4/developers/real-time.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -Harper provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. Harper supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -Harper real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a Harper application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -Harper is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -Harper supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in Harper is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -Harper supports MQTT with its `mqtt` server module and Harper supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - mTLS: false - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). - -#### Capabilities - -Harper's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In Harper topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -Harper supports QoS 0 and 1 for publishing and subscribing. - -Harper supports multi-level topics, both for subscribing and publishing. Harper also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. Harper currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -#### Events - -JavaScript components can also listen for MQTT events. This is available on the server.mqtt.events object. For example, to set up a listener/callback for when MQTT clients connect and authorize, we can do: - -```javascript -server.mqtt.events.on('connected', (session, socket) => { - console.log('client connected with id', session.clientId); -}); -``` - -The following MQTT events are available: - -- `connection` - When a client initially establishes a TCP or WS connection to the server -- `connected` - When a client establishes an authorized MQTT connection -- `auth-failed` - When a client fails to authenticate -- `disconnected` - When a client disconnects from the server - -### Ordering - -Harper is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. Harper is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, Harper does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because Harper prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and Harper will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Keep-Alive monitoring | :heavy_check_mark: | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties retain handling | :heavy_check_mark: | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.4/developers/replication/index.md b/versioned_docs/version-4.4/developers/replication/index.md deleted file mode 100644 index 45bcd6c8..00000000 --- a/versioned_docs/version-4.4/developers/replication/index.md +++ /dev/null @@ -1,256 +0,0 @@ ---- -title: Replication/Clustering ---- - -# Replication/Clustering - -Harper’s replication system is designed to make distributed data replication fast and reliable across multiple nodes. This means you can easily build a distributed database that ensures high availability, disaster recovery, and data localization. The best part? It’s simple to set up, configure, and manage. You can easily add or remove nodes, choose which data to replicate, and monitor the system’s health without jumping through hoops. - -### Replication Overview - -Harper replication uses a peer-to-peer model where every node in your cluster can send and subscribe to data. Each node connects through WebSockets, allowing data to flow seamlessly in both directions. By default, Harper takes care of managing these connections and subscriptions, so you don’t have to worry about data consistency. The system is designed to maintain secure, reliable connections between nodes, ensuring that your data is always safe. - -### Replication Configuration - -To connect your nodes, you need to provide hostnames or URLs for the nodes to connect to each other. This can be done via configuration or through operations. To configure replication, you can specify connection information the `replication` section of the [harperdb-config.yaml](../../deployments/configuration). Here, you can specify the host name of the current node, and routes to connect to other nodes, for example: - -```yaml -replication: - hostname: server-one - routes: - - server-two - - server-three -``` - -In this example, the current node is `server-one`, and it will connect to `server-two` and `server-three`. Routes to other nodes can also be configured with URLs or ports: - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9925 # URL based route - - hostname: server-three # define a hostname and port - port: 9930 -``` - -You can also use the [operations API](./operations-api/clustering) to dynamically add and remove nodes from the cluster. This is useful for adding new nodes to a running cluster or removing nodes that are no longer needed. For example (note this is the basic form, you would also need to provide the necessary credentials for the operation, see the section on securing connections for more details): - -```json -{ - "operation": "add_node", - "hostname": "server-two" -} -``` - -These operations will also dynamically generating certificates as needed, if there are no existing signed certificates, or if the existing certificates are not valid for the new node. - -Harper will also automatically replicate node information to other nodes in a cluster ([gossip-style discovery](https://highscalability.com/gossip-protocol-explained/)). This means that you only need to connect to one node in an existing cluster, and Harper will automatically detect and connect to other nodes in the cluster (bidirectionally). - -By default, Harper will replicate all the data in all the databases. You can configure which databases are replicated, and then override this behavior on a per-table basis. For example, you can indicate which databases should be replicated by default, here indicating you want to replicate the `data` and `system` databases: - -```yaml -replication: - databases: - - data - - system -``` - -By default, all tables within a replicated database will be replicated. Transactions are replicated atomically, which may involve data across multiple tables. However, you can also configure replication for individual tables, and disable and exclude replication for specific tables in a database by setting `replicate` to `false` in the table definition: - -```graphql -type LocalTableForNode @table(replicate: false) { - id: ID! - name: String! -} -``` - -You can also control which nodes data is replicated to, and how many nodes data is replicated to. By default, Harper will replicate data to all nodes in the cluster, but you can control where data is replicated to with the [sharding configuration and APIs](replication/sharding). - -By default replication will connect on the operations API network interface/port (9925 by default). You can configure the replication port in the `replication` section. For example, to change the replication port to 9930: - -```yaml -replication: - securePort: 9930 -``` - -This will change the replication port to 9930 and the operations API port will be on a separate port, remaining on 9925. - -### Securing Connections - -Harper supports the highest levels of security through public key infrastructure based security and authorization. Depending on your security configuration, you can configure Harper in several different ways to build a connected cluster. - -#### Provide your own certificates - -If you want to secure your Harper connections with your own signed certificates, you can easily do so. Whether you have certificates from a public authority (like Let's Encrypt or Digicert) or a corporate certificate authority, you can use them to authenticate nodes securely. You can then allow nodes to authorize each other by checking the certificate against the standard list of root certificate authorities by enabling the `enableRootCAs` option in the config: - -``` -replication - enableRootCAs: true -``` - -And then just make sure the certificate’s common name (CN) matches the node's hostname. - -#### Setting Up Custom Certificates - -There are two ways to configure Harper with your own certificates: - -1. Use the `add_certificate` operation to upload them. -1. Or, specify the certificate paths directly in the `replication` section of the `harperdb-config.yaml` file. - -If your certificate is signed by a trusted public authority, just provide the path to the certificate and private key. If you're using self-signed certificates or a private certificate authority, you’ll also need to provide the certificate authority (CA) details to complete the setup.\ -\ -Example configuration: - -```yaml -tls: - certificate: /path/to/certificate.pem - certificateAuthority: /path/to/ca.pem - privateKey: /path/to/privateKey.pem -``` - -With this in place, Harper will load the provided certificates into the certificate table and use these to secure and authenticate connections between nodes. - -You have the option to skip providing a specific certificate authority (CA) and instead verify your certificate against the root certificates included in the bundled Mozilla CA store. This bundled CA store, provided by Node.js, is a snapshot of Mozilla's CA certificates that is fixed at the time of each Node.js release. - -To enable the root certificates set `replication.enableRootCAs` to `true` in the `harperdb-config.yaml` file: - -```yaml -replication: - enableRootCAs: true -``` - -#### Cross-generated certificates - -Harper can also generate its own certificates for secure connections. This is useful for setting up secure connections between nodes when no existing certificates are available, and can be used in development, testing, or production environments. Certificates will be automatically requested and signed between nodes to support a form of distributed certificate generation and signing. To establish secure connections between nodes using cross-generated certificates, you simply use the [`add_node` operation](./operations-api/clustering) over SSL, and specify the temporary authentication credentials to use for connecting and authorizing the certificate generation and signing. \ -\ -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -When you connect to another node (e.g., `server-two`), Harper uses secure WebSockets and the provided credentials to establish the connection. - -If you’re working with a fresh install, you’ll need to set `verify_tls` to `false` temporarily, so the self-signed certificate is accepted. Once the connection is made, Harper will automatically handle the certificate signing process: - -- It creates a certificate signing request (CSR), sends it to `server-two`, which then signs it and returns the signed certificate along with the certificate authority (CA). -- The signed certificate is stored for future connections between the nodes, ensuring secure communication. - -**Important:** Your credentials are not stored—they are discarded immediately after use. - -You can also provide credentials in HTTP Authorization format (Basic auth, Token auth, or JWT). This is helpful for handling authentication with the required permissions to generate and sign certificates. - -Additionally, you can use `set_node` as an alias for the `add_node` operation if you prefer. - -#### Removing Nodes - -Nodes can be removed from the cluster using the [`remove_node` operation](./operations-api/clustering). This will remove the node from the cluster, and stop replication to and from the node. For example: - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -#### Insecure Connection IP-based Authentication - -You can completely disable secure connections and use IP addresses to authenticate nodes with each other. This can be useful for development and testing, or within a secure private network, but should never be used for production with publicly accessible servers. To disable secure connections, simply configure replication within an insecure port, either by [configuring the operations API](../../deployments/configuration) to run on an insecure port or replication to run on an insecure port. And then set up IP-based routes to connect to other nodes: - -```yaml -replication: - port: 9930 - routes: - - 127.0.0.2 - - 127.0.0.3 -``` - -Note that in this example, we are using loop back addresses, which can be a convenient way to run multiple nodes on a single machine for testing and development. - -#### Explicit Subscriptions - -#### Managing Node Connections and Subscriptions in Harper - -By default, Harper automatically handles connections and subscriptions between nodes, ensuring data consistency across your cluster. It even uses data routing to manage node failures. But if you want more control, you can manage these connections manually by explicitly subscribing to nodes. This is useful for advanced configurations, testing, or debugging. - -#### Important Notes on Explicit Subscriptions - -If you choose to manage subscriptions manually, Harper will no longer handle data consistency for you. This means there’s no guarantee that all nodes will have consistent data if subscriptions don’t fully replicate in all directions. If a node goes down, it’s possible that some data wasn’t replicated before the failure. - -#### How to Subscribe to Nodes - -To explicitly subscribe to a node, you can use operations like `add_node` and define the subscriptions. For example, you can configure a node (e.g., `server-two`) to publish transactions on a specific table (e.g., `dev.my-table`) without receiving data from that node. - -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": false - } - ] -} -``` - -To update an explicit subscription you can use the [`update_node` operation](./operations-api/clustering). - -Here we are updating the subscription to receive transactions on the `dev.my-table` table from the `server-two` node. - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": true - } - ] -} -``` - -#### Monitoring Replication - -You can monitor the status of replication through the operations API. You can use the [`cluster_status` operation](./operations-api/clustering) to get the status of replication. For example: - -```json -{ - "operation": "cluster_status" -} -``` - -#### Database Initial Synchronization and Resynchronization - -When a new node is added to the cluster, if its database has not previously been synced, it will initially download the database from the first node it connects to. This will copy every record from the source database to the new node. Once the initial synchronization is complete, the new node will enter replication mode and receive records from each node as they are created, updated, or deleted. If a node goes down and comes back up, it will also resynchronize with the other nodes in the cluster, to ensure that it has the most up-to-date data. - -You may also specify a `start_time` in the `add_node` to specify that when a database connects, that it should not download the entire database, but only data since a given starting time. - -**Advanced Configuration** - -You can also check the configuration of the replication system, including the current known nodes and certificates, by querying the hdb_nodes and hdb_certificate table: - -```json -{ - "operation": "search_by_value", - "database": "system", - "table": "hdb_nodes", - "search_attribute": "name", - "search_value": "*" -} -``` diff --git a/versioned_docs/version-4.4/developers/replication/sharding.md b/versioned_docs/version-4.4/developers/replication/sharding.md deleted file mode 100644 index 17065e14..00000000 --- a/versioned_docs/version-4.4/developers/replication/sharding.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Sharding ---- - -Harper's replication system supports various levels of replication or sharding. Harper can be configured or set up to replicate to different data to different subsets of nodes. This can be used facilitate horizontally scalability of storage and write performance, while maintaining optimal strategies of data locality and data consistency. When sharding is configured, Harper will replicate data to only a subset of nodes, based on the sharding configuration, and can then retrieve data from the appropriate nodes as needed to fulfill requests for data. - -## Configuration - -By default, Harper will replicate all data to all nodes. However, replication can easily be configured for "sharding", or storing different data in different locations or nodes. The simplest way to configure sharding and limit replication to improve performance and efficiency is to configure a replication-to count. This will limit the number of nodes that data is replicated to. For example, to specify that writes should replicate to 2 other nodes besides the node that first stored the data, you can set the `replicateTo` to 2 in the `replication` section of the `harperdb-config.yaml` file: - -```yaml -replication: - replicateTo: 2 -``` - -This will ensure that data is replicated to two other nodes, so that each record will be stored on three nodes in total. - -With a sharding configuration (or customization below) in place, requests will for records that don't reside on the server handling requests will automatically be forwarded to the appropriate node. This will be done transparently, so that the client will not need to know where the data is stored. - -## Replication Control with Headers - -With the REST interface, replication levels and destinations can also specified with the `X-Replicate-To` header. This can be used to indicate the number of additional nodes that data should be replicated to, or to specify the nodes that data should be replicated to. The `X-Replicate-To` header can be used with the `POST` and `PUT` methods. This header can also specify if the response should wait for confirmation from other nodes, and how many, with the `confirm` parameter. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: 2;confirm=1 - -... -``` - -You can also explicitly specify destination nodes by providing a comma-separated list of node hostnames. For example, to specify that data should be replicated to nodes `node1` and `node2`, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: node1,node2 -``` - -(This can also be used with the `confirm` parameter.) - -## Replication Control with Operations - -Likewise, you can specify replicateTo and confirm parameters in the operation object when using the Harper API. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following operation object: - -```jsonc -{ - "operation": "update", - "schema": "dev", - "table": "MyTable", - "hashValues": [3], - "record": { - "name": "John Doe", - }, - "replicateTo": 2, - "replicatedConfirmation": 1, -} -``` - -or you can specify nodes: - -```jsonc -{ - // ... - "replicateTo": ["node-1", "node-2"], - // ... -} -``` - -## Programmatic Replication Control - -Additionally, you can specify `replicateTo` and `replicatedConfirmation` parameters programmatically in the context of a resource. For example, you can define a put method: - -```javascript -class MyTable extends tables.MyTable { - put(record) { - const context = this.getContext(); - context.replicateTo = 2; // or an array of node names - context.replicatedConfirmation = 1; - return super.put(record); - } -} -``` - -## Custom Sharding - -You can also define a custom sharding strategy by specifying a function to compute the "residency" or location of where records should be stored and reside. To do this we use the `setResidency` method, providing a function that will determine the residency of each record. The function you provide will be called with the record entry, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the `id` field, you can use the following code: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? ['node1'] : ['node2']; -}); -``` - -With this approach, the record metadata, which includes the residency information, and any indexed properties, will be replicated to all nodes, but the full record will only be replicated to the nodes specified by the residency function. - -### Custom Sharding By Primary Key - -Alternately you can define a custom sharding strategy based on the primary key alone. This allows records to be retrieved without needing access to the record data or metadata. With this approach, data will only be replicated to the nodes specified by the residency function (the record metadata doesn't need to replicated to all nodes). To do this, you can use the `setResidencyById` method, providing a function that will determine the residency of each record based on the primary key. The function you provide will be called with the primary key, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the primary key, you can use the following code: - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? ['node1'] : ['node2']; -}); -``` - -### Disabling Cross-Node Access - -Normally sharding allows data to be stored in specific nodes, but still allows access to the data from any node. However, you can also disable cross-node access so that data is only returned if is stored on the node where it is accessed. To do this, you can set the `replicateFrom` property on the context of operation to `false`: - -```jsonc -{ - "operation": "search_by_id", - "table": "MyTable", - "ids": [3], - "replicateFrom": false, -} -``` - -Or use a header with the REST API: - -```http -GET /MyTable/3 -X-Replicate-From: none -``` diff --git a/versioned_docs/version-4.4/developers/rest.md b/versioned_docs/version-4.4/developers/rest.md deleted file mode 100644 index 8273e8c8..00000000 --- a/versioned_docs/version-4.4/developers/rest.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: REST ---- - -# REST - -## REST - -Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](./applications/) and [defining schemas](./applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](./applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -### GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -#### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -##### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -#### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -#### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. - -### PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -#### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -### DELETE - -This can be used to delete a record or records. - -### `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -### `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -### POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -#### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -### Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in Harper. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the attributes needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /Product/?category=software&price=gt=100&price=lt=200 -``` - -Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: - -``` -GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z -``` - -You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: - -```http -GET /Product/?name==Keyboard* -``` - -**Chained Conditions** - -You can also specify that a range condition must be met for a single attribute value by chaining conditions. This is done by omitting the name in the name-value pair. For example, to find products with a price between 100 and 200, you could write: - -```http -GET /Product/?price=gt=100<=200 -``` - -Chaining can be used to combined `gt` or `ge` with `lt` or `le` to specify a range of values. Currently, no other types of chaining are supported. - -Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. - -Here is a full list of the supported FIQL-style operators/comparators: - -- `==`: equal -- `=lt=`: less than -- `=le=`: less than or equal -- `=gt=`: greater than -- `=ge=`: greater than or equal -- `=ne=`, !=: not equal -- `=ct=`: contains the value (for strings) -- `=sw=`, `==*`: starts with the value (for strings) -- `=ew=`: ends with the value (for strings) -- `=`, `===`: strict equality (no type conversion) -- `!==`: strict inequality (no type conversion) - -#### Unions - -Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: - -```http -GET /Product/?rating=5|featured=true -``` - -#### Grouping of Operators - -Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: - -```http -GET /Product/?rating=5|(price=gt=100&price=lt=200) -``` - -Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: - -```http -GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] -``` - -And the tags could be safely generated from user inputs in a tag array like: - -```javascript -let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; -``` - -More complex queries can be created by further nesting groups: - -```http -GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] -``` - -### Query Calls - -Harper has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -#### `select(properties)` - -This function allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. -- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. - -To get a list of product names with a category of software: - -```http -GET /Product/?category=software&select(name) -``` - -#### `limit(start,end)` or `limit(end)` - -This function specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -#### `sort(property)`, `sort(+property,-property,...)` - -This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. - -For example, to sort by product name (in ascending order): - -```http -GET /Product?rating=gt=3&sort(+name) -``` - -To sort by rating in ascending order, then by price in descending order for products with the same rating: - -```http -GET /Product?sort(+rating,-price) -``` - -## Relationships - -Harper supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. - -### Chained Attributes and Joins - -To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides Harper's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - brandId: ID @indexed - brand: Brand @relationship(from: "brandId") -} -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: "brandId") -} -``` - -And then you could query a product by brand name: - -```http -GET /Product/?brand.name=Microsoft -``` - -This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. - -The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: - -```http -GET /Brand/?products.name=Keyboard -``` - -This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". - -#### Chained/Nested Select - -Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand) -``` - -We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand{name}) -``` - -Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: - -```http -GET /Product/?name=Keyboard&select(name,brand{name,id}) -``` - -When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. - -#### Many-to-many Relationships (Array of Foreign Keys) - -Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - resellerIds: [ID] @indexed - resellers: [Reseller] @relationship(from: "resellerId") -} -type Reseller @table { - id: ID @primaryKey - name: String - ... -} -``` - -The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": - -```http -GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) -``` - -One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): - -```http -PUT /Product/123 -Content-Type: application/json - -{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], -...} -``` - -#### Type Conversion - -Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: - -- `name==null`: Will convert the value to `null` for searching. -- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==number:123`: Will explicitly convert the value after "number:" to a number. -- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. -- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) -- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. - -If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. - -For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. - -#### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, Harper supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -#### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/versioned_docs/version-4.4/developers/security/basic-auth.md b/versioned_docs/version-4.4/developers/security/basic-auth.md deleted file mode 100644 index 9bc0160c..00000000 --- a/versioned_docs/version-4.4/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -Harper uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -** \_**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.**\_ ** - -A header is added to each HTTP request. The header key is **"Authorization"** the header value is **"Basic <<your username and password buffer token>>"** - -## Authentication in Harper Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for Harper. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.4/developers/security/certificate-management.md b/versioned_docs/version-4.4/developers/security/certificate-management.md deleted file mode 100644 index 43209e05..00000000 --- a/versioned_docs/version-4.4/developers/security/certificate-management.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for Harper external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of Harper does not have HTTPS enabled (see [configuration](../../deployments/configuration#http) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -By default Harper will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your Harper node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable Harper HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the Harper configuration with the path of your new certificate files, and then restart Harper. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set Harper will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### mTLS - -Mutual TLS (mTLS) is a security protocol that requires both the client and the server to present certificates to each other. Requiring a client certificate can be useful for authenticating clients and ensuring that only authorized clients can access your Harper instance. This can be enabled by setting the `http.mtls` configuration in `harperdb-config.yaml` to `true` and providing a certificate authority in the TLS section: - -```yaml - -http: - mtls: true - ... -tls: - certificateAuthority: ~/hdb/keys/ca.pem - ... -``` - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for Harper, Nginx can be used as a reverse proxy for Harper. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to Harper as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for Harper, a number of different external services can be used as a reverse proxy for Harper. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to Harper as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for Harper administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.4/developers/security/configuration.md b/versioned_docs/version-4.4/developers/security/configuration.md deleted file mode 100644 index 19251614..00000000 --- a/versioned_docs/version-4.4/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -Harper was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with Harper. - -## CORS - -Harper allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, Harper enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harpersystems.dev,https://products.harpersystems.dev` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -HarperDprovides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -Harper automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from Harper Operations API.** diff --git a/versioned_docs/version-4.4/developers/security/index.md b/versioned_docs/version-4.4/developers/security/index.md deleted file mode 100644 index 723db452..00000000 --- a/versioned_docs/version-4.4/developers/security/index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Security ---- - -# Security - -Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they’re supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -- [JWT Authentication](security/jwt-auth) -- [Basic Authentication](security/basic-auth) -- [mTLS Authentication](security/mtls-auth) -- [Configuration](security/configuration) -- [Users and Roles](security/users-and-roles) diff --git a/versioned_docs/version-4.4/developers/security/jwt-auth.md b/versioned_docs/version-4.4/developers/security/jwt-auth.md deleted file mode 100644 index 832373e4..00000000 --- a/versioned_docs/version-4.4/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -Harper uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all Harper operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their Harper credentials. The following POST body is sent to Harper. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by Harper. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by Harper. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.4/developers/security/mtls-auth.md b/versioned_docs/version-4.4/developers/security/mtls-auth.md deleted file mode 100644 index 375ec927..00000000 --- a/versioned_docs/version-4.4/developers/security/mtls-auth.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: mTLS Authentication ---- - -# mTLS Authentication - -Harper supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. diff --git a/versioned_docs/version-4.4/developers/security/users-and-roles.md b/versioned_docs/version-4.4/developers/security/users-and-roles.md deleted file mode 100644 index 1bf0b91b..00000000 --- a/versioned_docs/version-4.4/developers/security/users-and-roles.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -Harper utilizes a Role-Based Access Control (RBAC) framework to manage access to Harper instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in Harper - -Role permissions in Harper are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a Harper instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. Harper will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within Harper. See full breakdown of operations restricted to only super_user roles [here](#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a Harper instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a Harper instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```jsonc -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "database_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true, - }, - ], - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [], - }, - }, - }, - }, -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within Harper, but ignored, as super_users have full access to the database._ - -- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] -} -``` - -**Important Notes About Table Permissions** - -1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that Harper manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in Harper and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ - -| Databases and Tables | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| describe_all | | -| describe_database | | -| describe_table | | -| create_database | X | -| drop_database | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed Harper as `<>`. Because Harper stores files natively on the operating system, we only allow the Harper executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files Harper needs. This also keeps Harper more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.4/index.md b/versioned_docs/version-4.4/index.md deleted file mode 100644 index 92c48eef..00000000 --- a/versioned_docs/version-4.4/index.md +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: Harper Docs ---- - -# Harper Docs - -:::info - -### Get the Most Out of Harper - -Join our Discord to access expert support, collaborate with Harper’s core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) -::: - -## What is Harper? Performance, Simplicity, and Scale. - -Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. - -Harper simplifies scaling with clustering and native data replication. At scale, architectures tend to include 4 to 16 redundant, geo-distributed nodes located near every user population center. This ensures that every user experiences minimal network latency and maximum reliability in addition to the already rapid server responses. - -![](/img/v4.4/harperstack.jpg) - -## Understanding the Paradigm Shift - -Have you ever combined MongoDB with Redis, Next.js with Postgres, or perhaps Fastify with anything else? The options seem endless. It turns out that the cost of serialization, network hops, and intermediary processes in these systems adds up to 50% of the total system resources used (often more). Not to mention the hundreds of milliseconds of latency they can add. - -What we realized is that networking systems together in this way is inefficient and only necessary because a fused technology did not exist. So, we built Harper, a database fused with a complete JavaScript application system. It’s not only orders of magnitude more performant than separated systems, but it’s also easier to deploy and manage at scale. - -## Build With Harper - -Start by running Harper locally with [npm](https://www.npmjs.com/package/harperdb) or [Docker](https://hub.docker.com/r/harperdb/harperdb). - -Since technology tends to be built around the storage, processing, and transfer of data, start by [defining your schema](./developers/applications/#creating-our-first-table) with the `schema.graphql` file in the root of the application directory. - -If you would like to [query](./developers/applications/#adding-an-endpoint) this data, add the `@export` directive to our data schema and test out the [REST](./developers/rest), [MQTT](./developers/real-time#mqtt), or [WebSocket](./developers/real-time#websockets) endpoints. - -When you are ready for something a little more advanced, start [customizing your application](./developers/applications/#custom-functionality-with-javascript). - -Finally, when it’s time to deploy, explore [replication](./developers/replication/) between nodes. - -If you would like to jump into the most advanced capabilities, learn about [components](developers/components/index.md). - -For a more comprehensive deep dive, take a look at our [Getting Started Guide](/learn/). - -:::warning -Need help? Please don’t hesitate to [reach out](https://www.harpersystems.dev/contact). -::: - -## Popular Use Cases - -With so much functionality built in, the use cases span nearly all application systems. Some of the most popular are listed below, motivated by new levels of performance and system simplicity. - -### Online Catalogs & Content Delivery - -For use cases like e-commerce, real estate listing, and content-oriented sites, Harper’s breakthroughs in performance and distribution pay dividends in the form of better SEO and higher conversion rates. One common implementation leverages Harper’s [Next.js Component](https://github.com/HarperDB/nextjs) to host modern, performant frontend applications. Other implementations leverage the built-in caching layer and JavaScript application system to [server-side render pages](https://www.harpersystems.dev/development/tutorials/server-side-rendering-with-multi-tier-cache) that remain fully responsive because of built-in WebSocket connections. - -### Data Delivery Networks - -For use cases like real-time sports updates, flight tracking, and zero-day software update distribution, Harper is rapidly gaining popularity. Harper’s ability to receive and broadcast messages while simultaneously handling application logic and data storage streamlines operations and eliminates the need for multiple separate systems. To build an understanding of our messaging system function, refer to our [real-time documentation](./developers/real-time). - -### Edge Inference Systems - -Capturing, storing, and processing real-time data streams from client and IoT systems typically requires a stack of technology. Harper’s selective data replication and self-healing connections make for an ideal multi-tier system where edge and cloud systems both run Harper, making everything more performant. - -[We’re happy](https://www.harpersystems.dev/contact) to walk you through how to do this. - -## Getting Started - -
-
-

- - Quickstart - -

-

- Get up and running with Harper -

-
-
-

- - Quick Install Harper - -

-

- Run Harper on your on hardware -

-
-
-

- - Try Harper Cloud - -

-

- Spin up an instance in minutes to get going fast -

-
-
- -## Building with Harper - -
-
-

- - Harper Applications - -

-

- Build your a fully featured Harper Component with custom functionality -

-
-
-

- - REST Queries - -

-

- The recommended HTTP interface for data access, querying, and manipulation -

-
-
-

- - Operations API - -

-

- Configure, deploy, administer, and control your Harper instance -

-
-
- -
-
-

- - Clustering & Replication - -

-

- The process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. -

-
-
-

- - Explore the Harper Studio - -

-

- The web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user friendly interface. -

-
-
diff --git a/versioned_docs/version-4.4/reference/_category_.json b/versioned_docs/version-4.4/reference/_category_.json deleted file mode 100644 index d6302ac2..00000000 --- a/versioned_docs/version-4.4/reference/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Reference", - "position": 4, - "link": { - "type": "generated-index", - "title": "Reference Documentation", - "description": "Reference documentation and technical specifications", - "keywords": ["reference", "specifications"] - } -} diff --git a/versioned_docs/version-4.4/reference/analytics.md b/versioned_docs/version-4.4/reference/analytics.md deleted file mode 100644 index 0ab51c67..00000000 --- a/versioned_docs/version-4.4/reference/analytics.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -Harper provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -Harper collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the Harper analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -The following are general resource usage statistics that are tracked: - -- `memory` - This includes RSS, heap, buffer and external data usage. -- `utilization` - How much of the time the worker was processing requests. -- mqtt-connections - The number of MQTT connections. - -The following types of information is tracked for each HTTP request: - -- `success` - How many requests returned a successful response (20x response code). TTFB - Time to first byte in the response to the client. -- `transfer` - Time to finish the transfer of the data to the client. -- bytes-sent - How many bytes of data were sent to the client. - -Requests are categorized by operation name, for the operations API, by the resource (name) with the REST API, and by command for the MQTT interface. diff --git a/versioned_docs/version-4.4/reference/architecture.md b/versioned_docs/version-4.4/reference/architecture.md deleted file mode 100644 index 4155d5ff..00000000 --- a/versioned_docs/version-4.4/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -Harper's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/versioned_docs/version-4.4/reference/clustering/certificate-management.md b/versioned_docs/version-4.4/reference/clustering/certificate-management.md deleted file mode 100644 index 43839a4b..00000000 --- a/versioned_docs/version-4.4/reference/clustering/certificate-management.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box Harper generates certificates that are used when Harper nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the Harper node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that Harper generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your Harper cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make Harper start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart Harper. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other Harper nodes and to make requests to other Harper nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart Harper. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.4/reference/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.4/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 0a8b2a6c..00000000 --- a/versioned_docs/version-4.4/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via Harper users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, Harper must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.4/reference/clustering/enabling-clustering.md b/versioned_docs/version-4.4/reference/clustering/enabling-clustering.md deleted file mode 100644 index 606bc29c..00000000 --- a/versioned_docs/version-4.4/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install Harper**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.4/reference/clustering/establishing-routes.md b/versioned_docs/version-4.4/reference/clustering/establishing-routes.md deleted file mode 100644 index 7840a30e..00000000 --- a/versioned_docs/version-4.4/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the Harper configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.4/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.4/reference/clustering/index.md b/versioned_docs/version-4.4/reference/clustering/index.md deleted file mode 100644 index fddd3851..00000000 --- a/versioned_docs/version-4.4/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: NATS Clustering ---- - -# NATS Clustering - -Harper 4.0 - 4.3 used a clustering system based on NATS for replication. In 4.4+, Harper has moved to a new native replication system that has better performance, reliability, and data consistency. This document describes the legacy NATS clustering system. Harper clustering is the process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. - -Harper’s clustering engine replicates data between instances of Harper using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -Harper simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting Harper focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to Harper, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.4/reference/clustering/managing-subscriptions.md b/versioned_docs/version-4.4/reference/clustering/managing-subscriptions.md deleted file mode 100644 index f043c9d1..00000000 --- a/versioned_docs/version-4.4/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Managing subscriptions ---- - -Tables are replicated when the table is designated as replicating and there is subscription between the nodes. -Tables designated as replicating by default, but can be changed by setting `replicate` to `false` in the table definition: - -```graphql -type Product @table(replicate: false) { - id: ID! - name: String! -} -``` - -Or in your harperdb-config.yaml, you can set the default replication behavior for databases, and indicate which databases -should be replicated by default: - -```yaml -replication: - databases: data -``` - -If a table is not in the list of databases to be replicated, it will not be replicated unless the table is specifically set to replicate: - -```graphql -type Product @table(replicate: true) { - id: ID! - name: String! -} -``` - -Reading hdb*nodes (what we do \_to* the node, not what the node does). - -The subscription can be set to publish, subscribe, or both. - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `set_node_replication`. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "data", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "database": "data", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "database": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.4/reference/clustering/naming-a-node.md b/versioned_docs/version-4.4/reference/clustering/naming-a-node.md deleted file mode 100644 index 7a512efb..00000000 --- a/versioned_docs/version-4.4/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.4/reference/clustering/requirements-and-definitions.md b/versioned_docs/version-4.4/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 22bc3977..00000000 --- a/versioned_docs/version-4.4/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of Harper running. - -\*_A node is a single instance/installation of Harper. A node of Harper can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a Harper cluster. diff --git a/versioned_docs/version-4.4/reference/clustering/subscription-overview.md b/versioned_docs/version-4.4/reference/clustering/subscription-overview.md deleted file mode 100644 index 66b013db..00000000 --- a/versioned_docs/version-4.4/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.4/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.4/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.4/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.4/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.4/reference/clustering/things-worth-knowing.md b/versioned_docs/version-4.4/reference/clustering/things-worth-knowing.md deleted file mode 100644 index 3e976ef6..00000000 --- a/versioned_docs/version-4.4/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -Harper has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -Harper clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.4/clustering/figure6.png) diff --git a/versioned_docs/version-4.4/reference/content-types.md b/versioned_docs/version-4.4/reference/content-types.md deleted file mode 100644 index b7d223f4..00000000 --- a/versioned_docs/version-4.4/reference/content-types.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -Harper supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. Harper follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard Harper operations. - -:::tip Need a custom content type? - -Harper's extensible content type system lets you add support for any serialization format (XML, YAML, proprietary formats, etc.) by registering custom handlers in the [`contentTypes`](./globals.md#contenttypes) global Map. See the linked API reference for detailed implementation types, handler properties, and examples. - -::: - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by Harper, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with Harper. CBOR supports the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and Harper's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all Harper data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with Harper's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. - -In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/versioned_docs/version-4.4/reference/data-types.md b/versioned_docs/version-4.4/reference/data-types.md deleted file mode 100644 index 9f7d3e79..00000000 --- a/versioned_docs/version-4.4/reference/data-types.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -Harper supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (Harper’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. Harper supports MessagePack and CBOR, which allows for all of Harper supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: - -- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") -- `Int` - Any integer between from -2147483648 to 2147483647 -- `Long` - Any integer between from -9007199254740992 to 9007199254740992 -- `BigInt` - Any integer (negative or positive) with less than 300 digits - -Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in Harper. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in Harper’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in Harper. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in Harper property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well. JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in Harper. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in Harper as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.4/reference/dynamic-schema.md b/versioned_docs/version-4.4/reference/dynamic-schema.md deleted file mode 100644 index 5585f9cd..00000000 --- a/versioned_docs/version-4.4/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. Harper tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -Harper databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -Harper tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in Harper operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [Harper Storage Algorithm](./storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to Harper. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. Harper offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -Harper automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [Harper API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.4/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.4/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.4/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.4/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.4/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.4/reference/globals.md b/versioned_docs/version-4.4/reference/globals.md deleted file mode 100644 index 68742736..00000000 --- a/versioned_docs/version-4.4/reference/globals.md +++ /dev/null @@ -1,313 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with Harper is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that Harper provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -## `tables` - -This is an object that holds all the tables for the default database (called `data`) as properties. Each of these property values is a table class that subclasses the Resource interface and provides access to the table through the Resource interface. For example, you can get a record from a table (in the default database) called 'my-table' with: - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -async function getRecord() { - let record = await MyTable.get(recordId); -} -``` - -It is recommended that you [define a database](/learn/) for all the tables that are required to exist in your application. This will ensure that the tables exist on the `tables` object. Also note that the property names follow a CamelCase convention for use in JavaScript and in the GraphQL Schemas, but these are translated to snake_case for the actual table names, and converted back to CamelCase when added to the `tables` object. - -## `databases` - -This is an object that holds all the databases in Harper, and can be used to explicitly access a table by database name. Each database will be a property on this object, each of these property values will be an object with the set of all tables in that database. The default database, `databases.data` should equal the `tables` export. For example, if you want to access the "dog" table in the "dev" database, you could do so: - -```javascript -import { databases } from 'harperdb'; -const { Dog } = databases.dev; -``` - -## `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](./resource) for more details about implementing a Resource class. - -## `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -## `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -## `server` - -The `server` global object provides a number of functions and objects to interact with Harper's HTTP service. - -### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` - -Alias: `server.request` - -Add a handler method to the HTTP server request listener middleware chain. - -Returns an array of server instances based on the specified `options.port` and `options.securePort`. - -Example: - -```js -server.http( - (request, next) => { - return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); - }, - { - runFirst: true, // run this handler first - } -); -``` - -#### `RequestListener` - -Type: `(request: Request, next: RequestListener) => Promise` - -The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. - -### `Request` and `Response` - -The `Request` and `Response` classes are based on the WHATWG APIs for the [`Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) and [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) classes. Requests and responses are based on these standard-based APIs to facilitate reuse with modern web code. While Node.js' HTTP APIs are powerful low-level APIs, the `Request`/`Response` APIs provide excellent composability characteristics, well suited for layered middleware and for clean mapping to [RESTful method handlers](./resource) with promise-based responses, as well as interoperability with other standards-based APIs like [streams](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) used with [`Blob`s](https://developer.mozilla.org/en-US/docs/Web/API/Blob). However, the Harper implementation of these classes is not a direct implementation of the WHATWG APIs, but implements additional/distinct properties for the the Harper server environment: - -#### `Request` - -A `Request` object is passed to the direct static REST handlers, and preserved as the context for instance methods, and has the following properties: - -- `url` - This is the request target, which is the portion of the URL that was received by the server. If a client sends a request to `https://example.com:8080/path?query=string`, the actual received request is `GET /path?query=string` and the `url` property will be `/path?query=string`. -- `method` - This is the HTTP method of the request. This is a string like `GET`, `POST`, `PUT`, `DELETE`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the request. -- `pathname` - This is the path portion of the URL, without the query string. For example, if the URL is `/path?query=string`, the `pathname` will be `/path`. -- `protocol` - This is the protocol of the request, like `http` or `https`. -- `data` - This is the deserialized body of the request (based on the type of data specified by `Content-Type` header). -- `ip` - This is the remote IP address of the client that made the request (or the remote IP address of the last proxy to connect to Harper). -- `host` - This is the host of the request, like `example.com`. -- `sendEarlyHints(link: string, headers?: object): void` - This method sends an early hints response to the client, prior to actually returning a response. This is useful for sending a link header to the client to indicate that another resource should be preloaded. The `headers` argument can be used to send additional headers with the early hints response, in addition to the `link`. -- `login(username, password): Promise` - This method can be called to start an authenticated session. The login will authenticate the user by username and password. If the authentication was successful, a session will be created and a cookie will be set on the response header that references the session. All subsequent requests from the client that sends the cookie in requests will be authenticated as the user that logged in and the session record will be attached to the request. This method returns a promise that resolves when the login is successful, and rejects if the login is unsuccessful. -- `session` - This is the session object that is associated with current cookie-maintained session. This object is used to store session data for the current session. This is `Table` record instance, and can be updated by calling `request.session.update({ key: value })` or session can be retrieved with `request.session.get()`. If the cookie has not been set yet, a cookie will be set the first time a session is updated or a login occurs. -- `_nodeRequest` - This is the underlying Node.js [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) object. This can be used to access the raw request data, such as the raw headers, raw body, etc. However, this is discouraged and should be used with caution since it will likely break any other server handlers that depends on the layered `Request` call with `Response` return pattern. -- `_nodeResponse` - This is the underlying Node.js [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) object. This can be used to access the raw response data, such as the raw headers. Again, this is discouraged and can cause problems for middleware, should only be used if you are certain that other server handlers will not attempt to return a different `Response` object. - -#### `Response` - -REST methods can directly return data that is serialized and returned to users, or it can return a `Response` object (or a promise to a `Response`), or it can return a `Response`-like object with the following properties (or again, a promise to it): - -- `status` - This is the HTTP status code of the response. This is a number like `200`, `404`, `500`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the response. -- `data` - This is the data to be returned of the response. This will be serialized using Harper's content negotiation. -- `body` - Alternately (to `data`), the raw body can be returned as a `Buffer`, string, stream (Node.js or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)), or a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). - -#### `HttpOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -#### `HttpServer` - -Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. - -### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` - -Creates a socket server on the specified `options.port` or `options.securePort`. - -Only one socket server will be created. A `securePort` takes precedence. - -#### `ConnectionListener` - -Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) - -#### `SocketOptions` - -- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. -- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -#### `SocketServer` - -Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` - -Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](./globals#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. - -Example: - -```js -server.ws((ws, request, chainCompletion) => { - chainCompletion.then(() => { - ws.on('error', console.error); - - ws.on('message', function message(data) { - console.log('received: %s', data); - }); - - ws.send('something'); - }); -}); -``` - -#### `WsListener` - -Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` - -The WebSocket connection listener. - -- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. -- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. - -* The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. -* The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` - -#### `WsOptions` - -Type: `Object` - -Properties: - -- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` - -Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](./globals#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. - -This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. - -Example: - -> This example is from the Harper Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) - -```js -server.upgrade( - (request, socket, head, next) => { - if (request.url === '/_next/webpack-hmr') { - return upgradeHandler(request, socket, head).then(() => { - request.__harperdb_request_upgraded = true; - - next(request, socket, head); - }); - } - - return next(request, socket, head); - }, - { runFirst: true } -); -``` - -#### `UpgradeListener` - -Type: `(request, socket, head, next) => void` - -The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. - -#### `UpgradeOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.config` - -This provides access to the Harper configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into Harper's analytics. Harper efficiently records and tracks these metrics and makes them available through [analytics API](./analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. - -### `server.contentTypes` - -Returns the `Map` of registered content type handlers. Same as the [`contentTypes`](./globals#contenttypes) global. - -## `contentTypes` - -Returns a [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for request/response serialization. - -HarperDB uses content negotiation to automatically handle data serialization and deserialization for HTTP requests and other protocols. This process works by: - -1. **Request Processing**: Comparing the `Content-Type` header with registered handlers to deserialize incoming data into structured formats for processing and storage -2. **Response Generation**: Comparing the `Accept` header with registered handlers to serialize structured data into the appropriate response format - -### Built-in Content Types - -HarperDB includes handlers for common formats: - -- **JSON** (`application/json`) -- **CBOR** (`application/cbor`) -- **MessagePack** (`application/msgpack`) -- **CSV** (`text/csv`) -- **Event-Stream** (`text/event-stream`) -- And more... - -### Custom Content Type Handlers - -You can extend or replace content type handlers by modifying the `contentTypes` map from the `server` global (or `harperdb` export). The map is keyed by MIME type, with values being handler objects containing these optional properties: - -#### Handler Properties - -- **`serialize(data: any): Buffer | Uint8Array | string`** - Called to convert data structures into the target format for responses. Should return binary data (Buffer/Uint8Array) or a string. - -- **`serializeStream(data: any): ReadableStream`** - Called to convert data structures into streaming format. Useful for handling asynchronous iterables or large datasets. - -- **`deserialize(buffer: Buffer | string): any`** - Called to convert incoming request data into structured format. Receives a string for text MIME types (`text/*`) and a Buffer for binary types. Only used if `deserializeStream` is not defined. - -- **`deserializeStream(stream: ReadableStream): any`** - Called to convert incoming request streams into structured format. Returns deserialized data (potentially as an asynchronous iterable). - -- **`q: number`** _(default: 1)_ - Quality indicator between 0 and 1 representing serialization fidelity. Used in content negotiation to select the best format when multiple options are available. The server chooses the content type with the highest product of client quality × server quality values. - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` diff --git a/versioned_docs/version-4.4/reference/graphql.md b/versioned_docs/version-4.4/reference/graphql.md deleted file mode 100644 index 4c429853..00000000 --- a/versioned_docs/version-4.4/reference/graphql.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: GraphQL Querying ---- - -# GraphQL Querying - -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../developers/applications/defining-schemas), and for querying [Resources](./resource). - -Get started by setting `graphql: true` in `config.yaml`. - -This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. - -> GraphQL querying provides functionality for mapping GraphQL querying functionality to exported resources, and is based on the [GraphQL Over HTTP / GraphQL specifications](https://graphql.github.io/graphql-over-http/draft/#) (it is designed to intuitively map queries to Harper resources, but does not implement the full [specification](https://spec.graphql.org/) of resolvers, subscribers, and mutations). - -Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. - -For example, to request the GraphQL Query: - -```graphql -query GetDogs { - Dog { - id - name - } -} -``` - -The `GET` request would look like: - -```http -GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D -Accept: application/graphql-response+json -``` - -And the `POST` request would look like: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDogs { Dog { id name } } }" -} -``` - -> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. - -The Harper GraphQL querying system is strictly limited to exported Harper Resources. For many users, this will typically be a table that uses the `@exported` directive in its schema. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resource#query) for more complex queries. - -Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: - -```graphql -query GetDogsAndOwners { - Dog { - id - name - breed - } - - Owner { - id - name - occupation - } -} -``` - -This will return all dogs and owners in the database. And is equivalent to executing two REST queries: - -```http -GET /Dog/?select(id,name,breed) -# and -GET /Owner/?select(id,name,occupation) -``` - -### Request Parameters - -There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` - -1. `query` - _Required_ - The string representation of the GraphQL document. - 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. - 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). - 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. -1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter -1. `variables` - _Optional_ - A map of variable values to be used for the specified query - -### Type Checking - -The Harper GraphQL Querying system takes many liberties from the GraphQL specification. This extends to how it handle type checking. In general, the querying system does **not** type check. Harper uses the `graphql` parser directly, and then performs a transformation on the resulting AST. We do not control any type checking/casting behavior of the parser, and since the execution step diverges from the spec greatly, the type checking behavior is only loosely defined. - -In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. - -For example, the variable `$name: String!` states that `name` should be a non-null, string value. - -- If the request does not contain the `name` variable, an error will be returned -- If the request provides `null` for the `name` variable, an error will be returned -- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. -- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. - - If `null` is provided as the variable value, an error will still be returned. - - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. -- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. - -The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. - -- Objects will be transformed into properly nested attributes -- Strings and Boolean values are passed through as their AST values -- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. -- List and Enums are not supported. - -### Fragments - -The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. - -For example, in the query - -```graphql -query Get { - Dog { - ...DogFields - } -} - -fragment DogFields on Dog { - name - breed -} -``` - -The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). - -You can literally specify anything in the fragment and it will behave the same way: - -```graphql -fragment DogFields on Any { ... } # this is recommended -fragment DogFields on Cat { ... } -fragment DogFields on Animal { ... } -fragment DogFields on LiterallyAnything { ... } -``` - -As an actual example, fragments should be used for composition: - -```graphql -query Get { - Dog { - ...sharedFields - breed - } - Owner { - ...sharedFields - occupation - } -} - -fragment sharedFields on Any { - id - name -} -``` - -### Short Form Querying - -Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. - -For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. - -```graphql -query GetDog($id: ID!) { - Dog(id: $id) { - name - breed - owner { - name - } - } -} -``` - -And as a properly formed request: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", - "variables": { - "id": "0" - } -} -``` - -The REST equivalent would be: - -```http -GET /Dog/?id==0&select(name,breed,owner{name}) -# or -GET /Dog/0?select(name,breed,owner{name}) -``` - -Short form queries can handle nested attributes as well. - -For example, return all dogs who have an owner with the name `"John"` - -```graphql -query GetDog { - Dog(owner: { name: "John" }) { - name - breed - owner { - name - } - } -} -``` - -Would be equivalent to - -```http -GET /Dog/?owner.name==John&select(name,breed,owner{name}) -``` - -And finally, we can put all of these together to create semi-complex, equality based queries! - -The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. - -```graphql -query GetDog($dogName: String!, $ownerName: String!) { - Dog(name: $dogName, owner: { name: $ownerName }) { - name - breed - owner { - name - } - } -} -``` - -### Long Form Querying - -> Coming soon! - -### Mutations - -> Coming soon! - -### Subscriptions - -> Coming soon! - -### Directives - -> Coming soon! diff --git a/versioned_docs/version-4.4/reference/headers.md b/versioned_docs/version-4.4/reference/headers.md deleted file mode 100644 index 5c85fc88..00000000 --- a/versioned_docs/version-4.4/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Harper Headers ---- - -# Harper Headers - -All Harper API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all Harper API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.4/reference/index.md b/versioned_docs/version-4.4/reference/index.md deleted file mode 100644 index 4c5d867a..00000000 --- a/versioned_docs/version-4.4/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for Harper. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.4/reference/limits.md b/versioned_docs/version-4.4/reference/limits.md deleted file mode 100644 index 97214620..00000000 --- a/versioned_docs/version-4.4/reference/limits.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Harper Limits ---- - -# Harper Limits - -This document outlines limitations of Harper. - -## Database Naming Restrictions - -**Case Sensitivity** - -Harper database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -Harper database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -Harper limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. - -## Primary Keys - -The maximum length of a primary key is 1978 bytes or 659 characters (whichever is shortest). diff --git a/versioned_docs/version-4.4/reference/query-optimization.md b/versioned_docs/version-4.4/reference/query-optimization.md deleted file mode 100644 index 139b862b..00000000 --- a/versioned_docs/version-4.4/reference/query-optimization.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Query Optimization ---- - -## Query Optimization - -Harper has powerful query functionality with excellent performance characteristics. However, like any database, different queries can vary significantly in performance. It is important to understand how querying works to help you optimize your queries for the best performance. - -### Query Execution - -At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database and delivering the results based on required fields, relationships, and ordering. Harper supports indexed fields, and these indexes are used to speed up query execution. When conditions are specified in a query, Harper will attempt to utilize indexes to optimize the speed of query execution. When a field is not indexed, a query specifies a condition on that field, and the database check each potential record to determine if it matches the condition. - -When a query is performed with multiple conditions, Harper will attempt to optimize the ordering of these conditions. When using intersecting conditions (the default, an `and` operator, matching records must all match all conditions), Harper will attempt to to apply the most selective and performant condition first. This means that if one condition can use an index and is more selective than another, it will be used first to find the initial matching set of data and then filter based on the remaining conditions. If a condition can search an indexed field, with a selective condition, it will be used before conditions that aren't indexed, or as selective. The `search` method includes an `explain` flag that can be used to return a query execution order to understand how the query is being executed. This can be useful for debugging and optimizing queries. - -For a union query, each condition is executed separately and the results are combined/merged. - -### Condition, Operators, and Indexing - -When a query is performed, the conditions specified in the query are evaluated against the data in the database. The conditions can be simple or complex, and can include scalar operators such as `=`, `!=`, `>`, `<`, `>=`, `<=`, as well as `starts_with`, `contains`, and `ends_with`. The use of these operators can affect the performance of the query, especially when used with indexed fields. If an indexed field is not used, the database will have to check each potential record to determine if it matches the condition. If the only condition is not indexed, or there are no conditions with an indexed field, the database will have to check every record with a full table scan and can be very slow for large datasets (it will get slower as the dataset grows, `O(n)`). - -The use of indexed fields can significantly improve the performance of a query, providing fast performance even as the database grows in size (`O(log n)`). However, indexed fields require extra writes to the database when performing insert, update, or delete operations. This is because the index must be updated to reflect the changes in the data. This can slow down write operations, but the trade-off is often worth it if the field is frequently used in queries. - -The different operators can also affect the performance of a query. For example, using the `=` operator on an indexed field is generally faster than using the `!=` operator, as the latter requires checking all records that do not match the condition. An index is a sorted listed of values, so the greater than and less than operators will also utilize indexed fields when possible. If the range is narrow, these operations can be very fast. A wide range could yield a large number of records and will naturally incur more overhead. The `starts_with` operator can also leverage indexed fields because it quickly find the correct matching entries in the sorted index. On other hand, the `contains` and `ends_with` and not equal (`!=` or `not_equal`) operators can not leverage the indexes, so they will require a full table scan to find the matching records if they are not used in conjunction in with a selective/indexed condition. There is a special case of `!= null` which can use indexes to find non-null records. However, there is generally only helpful for sparse fields where a small subset are non-null values. More generally, operators are more efficient if they are selecting on fields with a high cardinality. - -Conditions can be applied to primary key fields or other indexed fields (known as secondary indexes). In general, querying on a primary key will be faster than querying on a secondary index, as the primary key is the most efficient way to access data in the database, and doesn't require cross-referencing to the main records. - -### Relationships/Joins - -Harper supports relationships between tables, allowing for "join" queries that. This does result in more complex queries with potentially larger performance overhead, as more lookups are necessary to connect matched or selected data with other tables. Similar principles apply to conditions which use relationships. Indexed fields and comparators that leverage the ordering are still valuable for performance. It is also important that if a condition on a table is connected to another table's foreign key, that that foreign key also be indexed. Likewise, if a query `select`s data from a related table that uses a foreign key to relate, that it is indexed. The same principles of higher cardinality applies here as well, more unique values allow for efficient lookups. - -### Sorting - -Queries can also specify a sort order. This can also significantly impact performance. If a query specifies a sort order on an indexed field, the database can use the index to quickly retrieve the data in the specified order. A sort order can be used in conjunction with a condition on the same (indexed) field can utilize the index for ordering. However, if the sort order is not on an indexed field, or the query specifies conditions on different fields, Harper will generally need to sort the data after retrieving it, which can be slow for large datasets. The same principles apply to sorting as they do to conditions. Sorting on a primary key is generally faster than sorting on a secondary index, if the condition aligns with the sort order. - -### Streaming - -One of the unique and powerful features of Harper's querying functionality is the ability to stream query results. When possible, Harper can return records from a query as they are found, rather than waiting for the entire query to complete. This can significantly improve performance for large queries, as it allows the application to start processing results or sending the initial data before the entire query is complete (improving time-to-first-byte speed, for example). However, using a sort order on a query with conditions that are not on an aligned index requires that the entire query result be loaded in order to perform the sorting, which defeats the streaming benefits. diff --git a/versioned_docs/version-4.4/reference/resource.md b/versioned_docs/version-4.4/reference/resource.md deleted file mode 100644 index c981318e..00000000 --- a/versioned_docs/version-4.4/reference/resource.md +++ /dev/null @@ -1,726 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to provide a unified API for modeling different data resources within Harper. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information to during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: - -- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initial access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. - - You can subsequently use the instance methods on the returned resource instance to perform additional actions on the record. -- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. - -The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. - -The REST-based API is a little different than traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind, but semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: - -- Read - `get` -- Create with a known primary key - `put` -- Create with a generated primary key - `post`/`create` -- Update (Full) - `put` -- Update (Partial) - `patch` -- Delete - `delete` - -The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped to a Resource instance where the instance's ID will be `3492`, and interactions will use the instance methods like `get()`, `put()`, and `post()`. Using the root path (`/MyResource/`) will map to a Resource instance with an ID of `null`, and this represents the collection of all the records in the resource or table. - -You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the Harper JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - async get() { - // fetch data from an external source, using our id - let response = await this.fetch(this.id); - // do something with the response - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../developers/components/reference#extensions). - -### `transaction` - -This provides a function for starting transactions. See the transactions section below for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. - -### `search(query: Query)`: AsyncIterable - -- Arguments - - `query`: The [Query](#query) object to use for the search -- Return value - - An [AsyncIterable](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/AsyncIterator) of records that match the query - -This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object, query?: Query): Resource|void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. - -The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -put(data, query) { - let param1 = query?.get?.('param1'); // returns 'value' - ... -} -``` - -### `patch(data: object): Resource|void|Response` - -### `patch(data: object, query?: Query)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?): Resource|void|Response` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.put(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message): Resource|void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data: object, query?: Query): Resource|void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowRead(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `allowUpdate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowDelete(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `addTo(property, value)` - -This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(id: Id, data: object, context?: Resource|Context): Promise` - -### `post(data: object, context?: Resource|Context): Promise` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](./transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', { name: 'related', select: ['description', 'id'] } ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attribute`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return { status: 302, headers: { Location: '/new-location' } }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: { message: 'Hello, World!' } }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.4/reference/roles.md b/versioned_docs/version-4.4/reference/roles.md deleted file mode 100644 index 2e3dc570..00000000 --- a/versioned_docs/version-4.4/reference/roles.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Roles ---- - -# Roles - -Roles in Harper are part of the application’s role-based access control (RBAC) system. You can declare roles in your application and manage their permissions through a roles configuration file. When the application starts, Harper will ensure all declared roles exist with the specified permissions, updating them if necessary. - -## Configuring Roles - -Point to a roles configuration file from your application’s `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -You can declare one or more files. Each file should define one or more roles in YAML format. - -## Roles File Structure - -A roles file (`roles.yaml`) contains role definitions keyed by role name. Each role may contain: - -- **super_user** – a boolean that grants all permissions. -- **databases** – one or more databases the role has access to. -- **tables** – within each database, table-level and attribute-level permissions. - -**Full Example** - -```yaml -: - super_user: # optional - : - : - read: - insert: - update: - delete: - attributes: - : - read: - insert: - update: -``` - -## Role Flags - -- `super_user: true` — grants full system access. -- `super_user: false` — the role only has the explicit permissions defined in the role. - -## Database and Table Permissions - -Within each role, you may specify one or more databases. Each database can declare permissions for tables. - -Example: - -```yaml -analyst: - super_user: false - data: - Sales: - read: true - insert: false - update: false - delete: false -``` - -In this example, the `analyst` role has read-only access to the `Sales` table in the `data` database. - -## Attribute-Level Permissions - -You can also grant or deny access at the attribute level within a table. - -Example: - -```yaml -editor: - data: - Articles: - read: true - insert: true - update: true - attributes: - title: - read: true - update: true - author: - read: true - update: false -``` - -Here, the `editor` role can update the `title` of an article but cannot update the `author`. - -## Multiple Roles - -Roles can be defined side by side in a single file: - -```yaml -reader: - super_user: false - data: - Dog: - read: true - -writer: - super_user: false - data: - Dog: - insert: true - update: true -``` - -## Behavior on Startup - -- If a declared role does not exist, Harper creates it. -- If a declared role already exists, Harper updates its permissions to match the definition. -- Roles are enforced consistently across deployments, keeping access control in sync with your application code. diff --git a/versioned_docs/version-4.4/reference/sql-guide/date-functions.md b/versioned_docs/version-4.4/reference/sql-guide/date-functions.md deleted file mode 100644 index c9747dcd..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: SQL Date Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Date Functions - -Harper utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.4/reference/sql-guide/features-matrix.md b/versioned_docs/version-4.4/reference/sql-guide/features-matrix.md deleted file mode 100644 index 7766faa4..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## SQL Features Matrix - -Harper provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.4/reference/sql-guide/functions.md b/versioned_docs/version-4.4/reference/sql-guide/functions.md deleted file mode 100644 index 02fff906..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/functions.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: Harper SQL Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Functions - -This SQL keywords reference contains the SQL functions available in Harper. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.4/reference/sql-guide/index.md b/versioned_docs/version-4.4/reference/sql-guide/index.md deleted file mode 100644 index 52f245ab..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## Harper SQL Guide - -The purpose of this guide is to describe the available functionality of Harper as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -Harper adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -Harper has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -Harper supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. Harper does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -Harper supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -Harper supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -Harper allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.4/reference/sql-guide/json-search.md b/versioned_docs/version-4.4/reference/sql-guide/json-search.md deleted file mode 100644 index 1c0c396b..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/json-search.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: SQL JSON Search ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL JSON Search - -Harper automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, Harper offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.4/reference/sql-guide/reserved-word.md b/versioned_docs/version-4.4/reference/sql-guide/reserved-word.md deleted file mode 100644 index 2cd812ba..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: Harper SQL Reserved Words ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: - -``` -SELECT * from data.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from data.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.4/reference/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.4/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index 0c56cf10..00000000 --- a/versioned_docs/version-4.4/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,419 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Geospatial Functions - -Harper geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: [https://geojson.org/](https://geojson.org/). There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -2. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -3. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between Harper’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain Harper Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see [https://developers.arcgis.com/documentation/spatial-references/](https://developers.arcgis.com/documentation/spatial-references/). Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find Harper Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "Harper Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.4/reference/storage-algorithm.md b/versioned_docs/version-4.4/reference/storage-algorithm.md deleted file mode 100644 index d936f1a5..00000000 --- a/versioned_docs/version-4.4/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The Harper storage algorithm is fundamental to the Harper core functionality, enabling the [Dynamic Schema](./dynamic-schema) and all other user-facing functionality. Harper is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within Harper. - -## Query Language Agnostic - -The Harper storage algorithm was designed to abstract the data storage from any individual query language. Harper currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, Harper offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each Harper table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. Harper tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [Harper Dynamic Schema](./dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -Harper inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## Harper Indexing Example (Single Table) - -![](/img/v4.4/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.4/reference/transactions.md b/versioned_docs/version-4.4/reference/transactions.md deleted file mode 100644 index 7e8546fb..00000000 --- a/versioned_docs/version-4.4/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. Harper provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in Harper. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because Harper is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the Harper environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_docs/version-4.5/administration/_category_.json b/versioned_docs/version-4.5/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/versioned_docs/version-4.5/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/versioned_docs/version-4.5/administration/administration.md b/versioned_docs/version-4.5/administration/administration.md deleted file mode 100644 index 9857b704..00000000 --- a/versioned_docs/version-4.5/administration/administration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -Harper is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own Harper servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database Harper is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. Harper provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](./administration/logging/): Harper defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). Harper has a [`get_backup`](../developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. Harper can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -Harper provides rapid horizontal scaling capabilities through node cloning functionality. - -### Monitoring - -Harper provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: - -- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](../reference/analytics). -- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](../developers/operations-api/system-operations). -- Information about the current cluster configuration and status can be found in the [cluster APIs](../developers/operations-api/clustering). -- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor Harper with Graphana. - -### Replication Transaction Logging - -Harper utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](./administration/logging/). diff --git a/versioned_docs/version-4.5/administration/cloning.md b/versioned_docs/version-4.5/administration/cloning.md deleted file mode 100644 index b3698092..00000000 --- a/versioned_docs/version-4.5/administration/cloning.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that when pointed to another instance of Harper will create a clone of that -instance's config, databases and setup full replication. If it is run in a location where there is no existing Harper install, -it will, along with cloning, install Harper. If it is run in a location where there is another Harper instance, it will -only clone config, databases and replication that do not already exist. - -Clone node is triggered when Harper is installed or started with certain environment or command line (CLI) variables set (see below). - -**Leader node** - the instance of Harper you are cloning.\ -**Clone node** - the new node which will be a clone of the leader node. - -To start clone run `harperdb` in the CLI with either of the following variables set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `REPLICATION_HOSTNAME` - _(optional)_ The clones replication hostname. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 REPLICATION_HOSTNAME=node-2.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--REPLICATION_HOSTNAME` - _(optional)_ The clones clustering host. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --REPLICATION_HOSTNAME node-2.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from -running again. If you want to run clone again set this value to `false`. If Harper is started with the clone variables -still present and `cloned` is true, Harper will just start as normal. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI -variables or cloning overtop of an existing `harperdb-config.yaml` file. - -More can be found in the Harper config documentation [here](../deployments/configuration). - -### Excluding database and components - -To set any specific (optional) clone config, including the exclusion of any database and/or replication, there is a file -called `clone-node-config.yaml` that can be used. - -The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. -If the directory does not exist, create one and add the file to it). - -The config available in `clone-node-config.yaml` is: - -```yaml -databaseConfig: - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -componentConfig: - exclude: - - name: null -``` - -_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, -unless it already exists on the clone._ - -`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. - -`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, -it will only clone the component reference that exists in the leader harperdb-config file. - -### Cloning configuration - -Clone node will not clone any configuration that is classed as unique to the leader node. This includes `replication.hostname`, `replication.url`,`clustering.nodeName`, -`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, -any authentication certificate/key paths. - -### Cloning system database - -Harper uses a database called `system` to store operational information. Clone node will only clone the user and role -tables from this database. It will also set up replication on this table, which means that any existing and future user and roles -that are added will be replicated throughout the cluster. - -Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with -the leader. - -### Replication - -If clone is run with the `REPLICATION_HOSTNAME` variable set, a fully replicating clone will be created. - -If any databases are excluded from the clone, replication will not be set up on these databases. - -### JWT Keys - -If cloning with replication, the leader's JWT private and public keys will be cloned. To disable this, include `CLONE_KEYS=false` in your clone variables. - -### Cloning overtop of an existing Harper instance - -Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication -that does not exist on the node it is running on. - -An example of how this can be useful is if you want to set Harper config before the clone is created. To do this you -would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then -when clone is run it will append the missing config to the file and install Harper with the desired config. - -Another useful example could be retroactively adding another database to an existing instance. Running clone on -an existing instance could create a full clone of another database and set up replication between the database on the -leader and the clone. - -### Cloning steps - -Clone node will execute the following steps when ran: - -1. Look for an existing Harper install. It does this by using the default (or user provided) `ROOTPATH`. -1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start Harper. -1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). -1. Fully clone any databases that don't already exist. -1. If classed as a "fresh clone", install Harper. An instance is classed as a fresh clone if there is no system database. -1. If `REPLICATION_HOSTNAME` is set, set up replication between the leader and clone. -1. Clone is complete, start Harper. - -### Cloning with Docker - -To run clone inside a container add the environment variables to your run command. - -For example: - -``` -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_LEADER_PASSWORD=password \ - -e HDB_LEADER_USERNAME=admin \ - -e HDB_LEADER_URL=https://1.123.45.6:9925 \ - -e REPLICATION_HOSTNAME=1.123.45.7 \ - -p 9925:9925 \ - -p 9926:9926 \ - harperdb/harperdb -``` - -Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/versioned_docs/version-4.5/administration/compact.md b/versioned_docs/version-4.5/administration/compact.md deleted file mode 100644 index 1a71db14..00000000 --- a/versioned_docs/version-4.5/administration/compact.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Compact ---- - -# Compact - -Database files can grow quickly as you use them, sometimes impeding performance. Harper has multiple compact features that can be used to reduce database file size and potentially improve performance. The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. - -There are two options that Harper offers for compacting a Database. - -_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ - -More information on the storage configuration options can be [found here](../deployments/configuration#storage) - -### Copy compaction - -It is recommended that, to prevent any record loss, Harper is not running when performing this operation. - -This will copy a Harper database with compaction. If you wish to use this new database in place of the original, you will need to move/rename it to the path of the original database. - -This command should be run in the [CLI](../deployments/harper-cli) - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - -### Compact on start - -Compact on start is a more automated option that will compact **all** databases when Harper is started. Harper will not start until compact is complete. Under the hood it loops through all non-system databases, creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database to where the original one is located and remove any backups. - -Compact on start is initiated by config in `harperdb-config.yaml` - -_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ - -`compactOnStart` - _Type_: boolean; _Default_: false - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -Using CLI variables - -```bash ---STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true -``` - -```bash -STORAGE_COMPACTONSTART=true -STORAGE_COMPACTONSTARTKEEPBACKUP=true -``` diff --git a/versioned_docs/version-4.5/administration/harper-studio/create-account.md b/versioned_docs/version-4.5/administration/harper-studio/create-account.md deleted file mode 100644 index 2c8a43bc..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [Harper Studio sign up page](https://studio.harperdb.io/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -2. Review the Privacy Policy and Terms of Service. -3. Click the sign up for free button. -4. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -5. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.5/administration/harper-studio/enable-mixed-content.md b/versioned_docs/version-4.5/administration/harper-studio/enable-mixed-content.md deleted file mode 100644 index 67747d71..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -Enabling mixed content is required in cases where you would like to connect the Harper Studio to Harper Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.5/administration/harper-studio/index.md b/versioned_docs/version-4.5/administration/harper-studio/index.md deleted file mode 100644 index 75f4ccfb..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Harper Studio ---- - -# Harper Studio - -Harper Studio is the web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user-friendly interface without any knowledge of the underlying Harper API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - -Harper now includes a simplified local Studio that is packaged with all Harper installations and served directly from the instance. It can be enabled in the [configuration file](../../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). - ---- - -## How does Studio Work? - -While Harper Studio is web based and hosted by us, all database interactions are performed on the Harper instance the studio is connected to. The Harper Studio loads in your browser, at which point you login to your Harper instances. Credentials are stored in your browser cache and are not transmitted back to Harper. All database interactions are made via the Harper Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -Harper Studio enables users to manage both Harper Cloud instances and privately hosted instances all from a single UI. All Harper instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.5/administration/harper-studio/instance-configuration.md b/versioned_docs/version-4.5/administration/harper-studio/instance-configuration.md deleted file mode 100644 index 394aa21c..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/instance-configuration.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -Harper instance configuration can be viewed and managed directly through the Harper Studio. Harper Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Applications URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in Harper database user_ - -- Created Date (Harper Cloud only) - -- Region (Harper Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (Harper Cloud only) - -- Disk IOPS (Harper Cloud only) - -## Update Instance RAM - -Harper Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For Harper Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if Harper Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The Harper Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - - If you do have a credit card associated, you will be presented with the updated billing information. - - - Click **Upgrade**. - -2. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -3. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The Harper instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -2. The instance will begin deleting immediately. - -## Restart Instance - -The Harper Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -2. The instance will begin restarting immediately. - -## Instance Config (Read Only) - -A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/versioned_docs/version-4.5/administration/harper-studio/instance-metrics.md b/versioned_docs/version-4.5/administration/harper-studio/instance-metrics.md deleted file mode 100644 index e9b48939..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The Harper Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [Harper logs](../logging/standard-logging), and Harper Cloud alarms (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.5/administration/harper-studio/instances.md b/versioned_docs/version-4.5/administration/harper-studio/instances.md deleted file mode 100644 index 07da8097..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The Harper Studio allows you to administer all of your HarperDinstances in one place. Harper currently offers the following instance types: - -- **Harper Cloud Instance** Managed installations of Harper, what we call [Harper Cloud](../../deployments/harper-cloud/). -- **5G Wavelength Instance** Managed installations of Harper running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ -- **Enterprise Instance** Any Harper installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. Harper stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the Harper instances using the standard [Harper API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. Harper Cloud and Enterprise instances are listed together. - -## Create a New Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select your desired Instance Type. -1. For a Harper Cloud Instance or a Harper 5G Wavelength Instance, click **Create Harper Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial Harper instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial Harper instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ _More on instance specs\_\_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your Harper data will reside. Storage is provisioned based on space and IOPS._ _More on IOPS Impact on Performance\_\_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your Harper Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register Enterprise Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select **Register Enterprise Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Instance Password - - _The password of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Host - - _The host to access the Harper instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the Harper instance. Harper defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The Harper Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **Harper Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **Enterprise Instance** The instance will be removed from the Harper Studio only. This does not uninstall Harper from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -Harper instances can be resized on the [Instance Configuration](./instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a Harper user that is already configured in your Harper instance._ - -1. Enter the database password. - - _The password of a Harper user that is already configured in your Harper instance._ - -1. Click **Log In**. diff --git a/versioned_docs/version-4.5/administration/harper-studio/login-password-reset.md b/versioned_docs/version-4.5/administration/harper-studio/login-password-reset.md deleted file mode 100644 index 93f9a727..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your Harper Studio Account - -To log into your existing Harper Studio account: - -1. Navigate to the [Harper Studio](https://studio.harperdb.io/). -2. Enter your email address. -3. Enter your password. -4. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the Harper Studio password reset page. -2. Enter your email address. -3. Click **send password reset email**. -4. If the account exists, you will receive an email with a temporary password. -5. Navigate back to the Harper Studio login page. -6. Enter your email address. -7. Enter your temporary password. -8. Click **sign in**. -9. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -10. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the Harper Studio profile page. -2. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -3. Click the **Update Password** button. diff --git a/versioned_docs/version-4.5/administration/harper-studio/manage-applications.md b/versioned_docs/version-4.5/administration/harper-studio/manage-applications.md deleted file mode 100644 index 854b94b3..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/manage-applications.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Manage Applications ---- - -# Manage Applications - -[Harper Applications](../../developers/applications/) are enabled by default and can be configured further through the Harper Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of Harper Applications behavior. - -All Applications configuration and development is handled through the **applications** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **applications** in the instance control bar. - -_Note, the **applications** page will only be available to super users._ - -## Manage Applications - -The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your Harper Applications. The editor provides the ability to create new applications or import/deploy remote application packages. - -The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. - -The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the Harper Studio. - -## Things to Keep in Mind - -To learn more about developing Harper Applications, make sure to read through the [Applications](../../developers/applications/) documentation. - -When working with Applications in the Harper Studio, by default the editor will restart the Harper Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the Harper logs, which can be found on the [status page](./instance-metrics). - -The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your Harper instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your Harper instance click the "revert" button. - -## Accessing Your Application Endpoints - -Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [Harper HTTP port found in the Harper configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](./instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. - -Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. - -- **Standard REST Endpoints**\ - Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. -- **Augmented REST Endpoints**\ - Harper Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. -- **Fastify Routes**\ - If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [Harper Application Template](https://github.com/HarperDB/application-template/), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. - -## Creating a New Application - -1. From the application page, click the "+ app" button at the top right. -1. Click "+ Create A New Application Using The Default Template". -1. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. -1. Click OK. -1. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. - -## Editing an Application - -1. From the applications page, click the file you would like to edit from the file navigator on the left. -1. Edit the file with any changes you'd like. -1. Click "save" at the top right. Note, as mentioned above, when you save a file, the Harper Applications server will be restarted immediately. diff --git a/versioned_docs/version-4.5/administration/harper-studio/manage-databases-browse-data.md b/versioned_docs/version-4.5/administration/harper-studio/manage-databases-browse-data.md deleted file mode 100644 index c9b2844d..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/manage-databases-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Databases / Browse Data ---- - -# Manage Databases / Browse Data - -Manage instance databases/tables and browse data in tabular format with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage databases and tables, add new data, and more. - -## Manage Databases and Tables - -#### Create a Database - -1. Click the plus icon at the top right of the databases section. -2. Enter the database name. -3. Click the green check mark. - -#### Delete a Database - -Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. - -1. Click the minus icon at the top right of the databases section. -2. Identify the appropriate database to delete and click the red minus sign in the same row. -3. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired database from the databases section. -2. Click the plus icon at the top right of the tables section. -3. Enter the table name. -4. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -5. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired database from the databases section. -2. Click the minus icon at the top right of the tables section. -3. Identify the appropriate table to delete and click the red minus sign in the same row. -4. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -2. This expands the search filters. -3. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -2. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 2. Click **Import From URL**. - 3. The CSV will load, and you will be redirected back to browse table data. -3. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 2. Navigate to your desired CSV file and select it. - 3. Click **Insert X Records**, where X is the number of records in your CSV. - 4. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -2. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -3. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -4. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -2. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -3. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -2. Click the **delete icon**. -3. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.5/administration/harper-studio/manage-instance-roles.md b/versioned_docs/version-4.5/administration/harper-studio/manage-instance-roles.md deleted file mode 100644 index 5a1e36e2..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/manage-instance-roles.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance role configuration is handled through the **roles** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **roles** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -2. Enter the role name. - -3. Click the green check mark. - -4. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. - -5. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -6. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -2. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -3. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the roles section. - -2. Identify the appropriate role to delete and click the red minus sign in the same row. - -3. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.5/administration/harper-studio/manage-instance-users.md b/versioned_docs/version-4.5/administration/harper-studio/manage-instance-users.md deleted file mode 100644 index e125464a..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance user configuration is handled through the **users** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -Harper instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](./manage-instance-roles)._ - -2. Click **Add User**. - -## Edit a User - -Harper instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -2. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 2. Click **Update Password**. - -3. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 2. Click **Update Role**. - -4. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 2. Click **Delete User**. diff --git a/versioned_docs/version-4.5/administration/harper-studio/manage-replication.md b/versioned_docs/version-4.5/administration/harper-studio/manage-replication.md deleted file mode 100644 index a8951b19..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/manage-replication.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Manage Replication ---- - -# Manage Replication - -Harper instance clustering and replication can be configured directly through the Harper Studio. It is recommended to read through the [clustering documentation](../../reference/clustering) first to gain a strong understanding of Harper clustering behavior. - -All clustering configuration is handled through the **replication** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -2. Click the appropriate organization that the instance belongs to. - -3. Select your desired instance. - -4. Click **replication** in the instance control bar. - -Note, the **replication** page will only be available to super users. - ---- - -## Initial Configuration - -Harper instances do not have clustering configured by default. The Harper Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../reference/clustering/creating-a-cluster-user) document before proceeding. - -1. Enter Cluster User username. (Defaults to `cluster_user`). -2. Enter Cluster Password. -3. Review and/or Set Cluster Node Name. -4. Click **Enable Clustering**. - -At this point the Studio will restart your Harper Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. - ---- - -## Connect an Instance - -Harper Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -2. Identify the instance you would like to connect from the **unconnected instances** panel. - -3. Click the plus icon next the appropriate instance. - -4. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -Harper Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -2. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, database, and table for replication to be configured. - -2. For publish, click the toggle switch in the **publish** column. - -3. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.5/administration/harper-studio/organizations.md b/versioned_docs/version-4.5/administration/harper-studio/organizations.md deleted file mode 100644 index faae220e..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -Harper Studio organizations provide the ability to group Harper Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique Harper Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for Harper Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the Harper Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the **Create a New Organization** card. -3. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -4. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the Harper Studio Organizations page. -2. Identify the proper organization card and click the trash can icon. -3. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -4. Click the **Do It** button. - -## Manage Users - -Harper Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. In the **add user** box, enter the new user’s email address. -5. Click **Add User**. - -Users may or may not already be Harper Studio users when adding them to an organization. If the Harper Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a Harper Studio account, they will receive an email welcoming them to Harper Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the Harper Studio Organizations page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their Harper Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **users** at the top of the screen. -4. Click the appropriate user from the **existing users** section. -5. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -6. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -2. Click the appropriate organization card. -3. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_Harper billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -2. Click **Add Coupon**. -3. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.5/administration/harper-studio/query-instance-data.md b/versioned_docs/version-4.5/administration/harper-studio/query-instance-data.md deleted file mode 100644 index fa6c1ffe..00000000 --- a/versioned_docs/version-4.5/administration/harper-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the Harper Studio with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -2. Click the appropriate organization that the instance belongs to. -3. Select your desired instance. -4. Click **query** in the instance control bar. -5. Enter your SQL query in the SQL query window. -6. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The Harper Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.5/administration/jobs.md b/versioned_docs/version-4.5/administration/jobs.md deleted file mode 100644 index 84859ffd..00000000 --- a/versioned_docs/version-4.5/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -Harper Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](../developers/operations-api/bulk-operations#csv-data-load) - -[csv file load](../developers/operations-api/bulk-operations#csv-file-load) - -[csv url load](../developers/operations-api/bulk-operations#csv-url-load) - -[import from s3](../developers/operations-api/bulk-operations#import-from-s3) - -[delete_records_before](../developers/operations-api/utilities#delete-records-before) - -[export_local](../developers/operations-api/utilities#export-local) - -[export_to_s3](../developers/operations-api/utilities#export-to-s3) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.5/administration/logging/audit-logging.md b/versioned_docs/version-4.5/administration/logging/audit-logging.md deleted file mode 100644 index 209b4981..00000000 --- a/versioned_docs/version-4.5/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart Harper for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [Harper API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.5/administration/logging/index.md b/versioned_docs/version-4.5/administration/logging/index.md deleted file mode 100644 index bde1870a..00000000 --- a/versioned_docs/version-4.5/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -Harper provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): Harper maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): Harper uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): Harper stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.5/administration/logging/standard-logging.md b/versioned_docs/version-4.5/administration/logging/standard-logging.md deleted file mode 100644 index 044c2260..00000000 --- a/versioned_docs/version-4.5/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -Harper maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the Harper application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -Harper clustering utilizes two [NATS](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of Harper and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -Harper logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the Harper API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.5/administration/logging/transaction-logging.md b/versioned_docs/version-4.5/administration/logging/transaction-logging.md deleted file mode 100644 index 353d4f32..00000000 --- a/versioned_docs/version-4.5/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -Harper offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. Harper leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.5/deployments/_category_.json b/versioned_docs/version-4.5/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/versioned_docs/version-4.5/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/versioned_docs/version-4.5/deployments/configuration.md b/versioned_docs/version-4.5/deployments/configuration.md deleted file mode 100644 index f7f5f5a7..00000000 --- a/versioned_docs/version-4.5/deployments/configuration.md +++ /dev/null @@ -1,1150 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -Some configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase, such as `operationsApi`. - -To change a configuration value, edit the `harperdb-config.yaml` file and save any changes. **HarperDB must be restarted for changes to take effect.** - -Alternatively, all configuration values can also be modified using environment variables, command line arguments, or the operations API via the [`set_configuration` operation](../developers/operations-api/utilities#set-configuration). - -For nested configuration elements, use underscores to represent parent-child relationships. When accessed this way, elements are case-insensitive. - -For example, to disable logging rotation in the `logging` section: - -```yaml -logging: - rotation: - enabled: false -``` - -You could apply this change using: - -- Environment variable: `LOGGING_ROTATION_ENABLED=false` -- Command line variable: `--LOGGING_ROTATION_ENABLED false` -- Operations API (`set_configuration`): `logging_rotation_enabled: false` - -To change the `port` in the `http` section, use: - -- Environment variable: `HTTP_PORT=` -- Command line variable: `--HTTP_PORT ` -- Operations API (`set_configuration`): `http_port: ` - -To set the `operationsApi.network.port` to `9925`, use: - -- Environment variable: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variable: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Operations API (`set_configuration`): `operationsApi_network_port: 9925` - -_Note: Component configuration cannot be added or updated via CLI or ENV variables._ - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install Harper overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -Harper is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using Harper to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using Harper behind a proxy server or application server, all the remote ip addresses will be the same and Harper will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`corsAccessControlAllowHeaders` - _Type_: string; _Default_: 'Accept, Content-Type, Authorization' - -A string representation of a comma separated list of header keys for the [Access-Control-Allow-Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) header for OPTIONS requests. - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`maxHeaderSize` - _Type_: integer; _Default_: 16394 - -The maximum allowed size of HTTP headers in bytes. - -`requestQueueLimit` - _Type_: integer; _Default_: 20000 - -The maximum estimated request queue time, in milliseconds. When the queue is above this limit, requests will be rejected with a 503. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper component server uses for HTTPS connections. This requires a valid certificate and key. - -`http2` - _Type_: boolean; _Default_: false - -Enables HTTP/2 for the HTTP server. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - maxHeaderSize: 8192 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -```yaml -http: - mtls: true -``` - -or - -```yaml -http: - mtls: - required: true - user: user-name -``` - ---- - -### `threads` - -The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: - -`count` - _Type_: number; _Default_: One less than the number of logical cores/processors - -The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because Harper does have other threads at work), assuming Harper is the main service on a server. - -```yaml -threads: - count: 11 -``` - -`debug` - _Type_: boolean | object; _Default_: false - -This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. - -`debug.port` - The port to use for debugging the main thread `debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. `debug.host` - Specify the host interface to listen on `debug.waitForDebugger` - Wait for debugger before starting - -```yaml -threads: - debug: - port: 9249 -``` - -`maxHeapMemory` - _Type_: number; - -```yaml -threads: - maxHeapMemory: 300 -``` - -This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. - ---- - -### `replication` - -The `replication` section configures [Harper replication](../developers/replication/), which is used to create Harper clusters and replicate data between the instances. - -```yaml -replication: - hostname: server-one - url: wss://server-one:9925 - databases: '*' - routes: - - wss://server-two:9925 - port: null - securePort: 9933, - enableRootCAs: true -``` - -`hostname` - _Type_: string; - -The hostname of the current Harper instance. - -`url` - _Type_: string; - -The URL of the current Harper instance. - -`databases` - _Type_: string/array; _Default_: "\*" (all databases) - -Configure which databases to replicate. This can be a string for all database or an array for specific databases. The list can be a simple array of database names: - -```yaml -replication: - databases: - - system - - data - - mydb -``` - -The database list can also specify databases that are purely "sharded" databases. For databases that are marked as sharded, replication will _only_ create database subscription connections to nodes in the same shard. Sharding can still function without this setting, since the residency location for sharding can be determined for each table or each record. However, using this setting will reduce the overhead of connections in situations where all data is uniformly sharded, creating a simpler and more efficient replication topology. To mark databases as sharded, you can specify a list of databases with a `name` and `sharded` flag: - -```yaml -replication: - databases: - - name: system - - name: data - sharded: true -``` - -`routes` - _Type_: array; - -An array of routes to connect to other nodes. Each element in the array can be either a string or an object with `hostname`, `port` and optionally `startTime` properties. - -`startTime` - _Type_: string; ISO formatted UTC date string. - -Replication will attempt to catch up on all remote data upon setup. To start replication from a specific date, set this property. - -`revokedCertificates` - _Type_: array; - -An array of serial numbers of revoked certificates. If a connection is attempted with a certificate that is in this list, the connection will be rejected. - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9925 # URL based route - - hostname: server-three # define a hostname and port - port: 9930 - startTime: 2024-02-06T15:30:00Z - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -`port` - _Type_: integer; - -The port to use for replication connections. - -`securePort` - _Type_: integer; _Default_: 9933 - -The port to use for secure replication connections. - -`enableRootCAs` - _Type_: boolean; _Default_: true - -When true, Harper will verify certificates against the Node.js bundled CA store. The bundled CA store is a snapshot of the Mozilla CA store that is fixed at release time. - -`blobTimeout` - _Type_: number; _Default_: 120000 - -Amount of time to wait for a blob to be transferred before timing out, measured in milliseconds. - -`failOver` - _Type_: boolean; _Default_: true - -When true, Harper will attempt to fail-over to subscribing to a different node if the current node is unreachable, to reach consistency. - -`shard` - _Type_: integer; - -This defines the shard id of this instance and is used in conjunction with the [Table Resource functions](../developers/replication/sharding#custom-sharding) `setResidency` & `setResidencyById` to programmatically route traffic to the proper shard. - ---- - -### `clustering` using NATS - -The `clustering` section configures the NATS clustering engine, this is used to replicate data between instances of Harper. - -_Note: There exist two ways to create clusters and replicate data in Harper. One option is to use native Harper replication over Websockets. The other option is to use_ [_NATS_](https://nats.io/about/) _to facilitate the cluster._ - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the Harper mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - -```yaml -clustering: - leafServer: - streams: - maxConsumeMsgs: 100 - maxIngestThreads: 2 -``` - -`maxConsumeMsgs` - _Type_: integer; _Default_: 100 - -The maximum number of messages a consumer can process in one go. - -`maxIngestThreads` - _Type_: integer; _Default_: 2 - -The number of Harper threads that are delegated to ingesting messages. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your Harper cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special Harper user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local Harper Studio, a GUI for Harper hosted on the server. A hosted version of the Harper Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or Harper Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures Harper logging across all Harper functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root`). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: warn - -Control the verbosity of text event logs. - -```yaml -logging: - level: warn -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`console` - _Type_: boolean; _Default_: true - -Controls whether console.log and other console.\* calls (as well as another JS components that writes to `process.stdout` and `process.stderr`) are logged to the log file. By default, these are logged to the log file, but this can be disabled. - -```yaml -logging: - console: true -``` - -`root` - _Type_: string; _Default_: \/log - -The path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: true - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log Harper logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - -`auditAuthEvents` - -`logFailed` - _Type_: boolean; _Default_: false - -Log all failed authentication events. - -_Example:_ `[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -`logSuccessful` - _Type_: boolean; _Default_: false - -Log all successful authentication events. - -_Example:_ `[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -```yaml -logging: - auditAuthEvents: - logFailed: false - logSuccessful: false -``` - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in Harper. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any Harper servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the Harper Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - domainSocket: /user/hdb/operations-server - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server - -The path to the Unix domain socket used to provide the Operations API through the CLI - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the Harper operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The Harper database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the Harper application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: true - -The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`compression.dictionary` _Type_: number; _Default_: null - -Path to a compression dictionary file - -`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` - -Only entries that are larger than this value (in bytes) will be compressed. - -```yaml -storage: - compression: - dictionary: /users/harperdb/dict.txt - threshold: 1000 -``` - -`compactOnStart` - _Type_: boolean; _Default_: false - -When `true` all non-system databases will be compacted when starting Harper, read more [here](../administration/compact). - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -Keep the backups made by compactOnStart. - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -`maxTransactionQueueTime` - _Type_: time; _Default_: 45s - -The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). - -```yaml -storage: - maxTransactionQueueTime: 2m -``` - -`noReadAhead` - _Type_: boolean; _Default_: false - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/database` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by Harper. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - -`blobPaths` - _Type_: string; _Default_: `/blobs` - -The `blobPaths` configuration sets where all the blob files should reside. This can be an array of paths, and if there are multiple, the blobs will be distributed across the paths. - -```yaml -storage: - blobPaths: - - /users/harperdb/big-storage -``` - -`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS - -Defines the page size of the database. - -```yaml -storage: - pageSize: 4096 -``` - -`reclamation` - -The reclamation section provides configuration for the reclamation process, which is responsible for reclaiming space when free space is low. For example: - -```yaml -storage: - reclamation: - threshold: 0.4 # Start storage reclamation efforts when free space has reached 40% of the volume space (default) - interval: 1h # Reclamation will run every hour (default) - evictionFactor: 100000 # A factor used to determine how much aggressively to evict cached entries (default) -``` - ---- - -### `tls` - -The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`ciphers` - _Type_: string; - -Allows specific ciphers to be set. - -If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: - -```yaml -tls: - - certificate: ~/hdb/keys/certificate1.pem - certificateAuthority: ~/hdb/keys/ca1.pem - privateKey: ~/hdb/keys/privateKey1.pem - host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. - - certificate: ~/hdb/keys/certificate2.pem - certificateAuthority: ~/hdb/keys/ca2.pem - privateKey: ~/hdb/keys/privateKey2.pem -``` - -Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. - ---- - -### `mqtt` - -The MQTT protocol can be configured in this section. - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 - mtls: false - webSocket: true - requireAuthentication: true -``` - -`port` - _Type_: number; _Default_: 1883 - -This is the port to use for listening for insecure MQTT connections. - -`securePort` - _Type_: number; _Default_: 8883 - -This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. - -`webSocket` - _Type_: boolean; _Default_: true - -This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. - -`requireAuthentication` - _Type_: boolean; _Default_: true - -This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` - -This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. - -For example, you could specify that mTLS is required and will authenticate as "user-name": - -```yaml -mqtt: - network: - mtls: - user: user-name - required: true -``` - ---- - -### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. This configuration should be set before the database and table have been created. The configuration will not create the directories in the path, that must be done by the user. - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` - ---- - -### Components - -`` - _Type_: string - -The name of the component. This will be used to name the folder where the component is installed and must be unique. - -`package` - _Type_: string - -A reference to your [component](../developers/components/managing#adding-components-to-root) package. This could be a remote git repo, a local folder/file or an NPM package. Harper will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. - -Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) - -`port` - _Type_: number _Default_: whatever is set in `http.port` - -The port that your component should listen on. If no port is provided it will default to `http.port` - -```yaml -: - package: 'HarperDB-Add-Ons/package-name' - port: 4321 -``` diff --git a/versioned_docs/version-4.5/deployments/harper-cli.md b/versioned_docs/version-4.5/deployments/harper-cli.md deleted file mode 100644 index e559df01..00000000 --- a/versioned_docs/version-4.5/deployments/harper-cli.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: Harper CLI ---- - -# Harper CLI - -## Harper CLI - -The Harper command line interface (CLI) is used to administer [self-installed Harper instances](./install-harper/). - -### Installing Harper - -To install Harper with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, Harper installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](./configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -**Environment Variables** - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -**Command Line Arguments** - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -### Starting Harper - -To start Harper after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -### Stopping Harper - -To stop Harper once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -### Restarting Harper - -To restart Harper once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -### Getting the Harper Version - -To check the version of Harper that is installed run the following command: - -```bash -harperdb version -``` - ---- - -### Renew self-signed certificates - -To renew the Harper generated self-signed certificates, run: - -```bash -harperdb renew-certs -``` - ---- - -### Copy a database with compaction - -To copy a Harper database with compaction (to eliminate free-space and fragmentation), use - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - ---- - -### Get all available CLI commands - -To display all available Harper CLI commands along with a brief description run: - -```bash -harperdb help -``` - ---- - -### Get the status of Harper and clustering - -To display the status of the Harper process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - ---- - -### Backups - -Harper uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that Harper maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a Harper database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. - ---- - -## Operations API through the CLI - -Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. To call the operation use the following convention: ` =`. By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. - -Some examples are: - -```bash -$ harperdb describe_table database=dev table=dog - -schema: dev -name: dog -hash_attribute: id -audit: true -schema_defined: false -attributes: - - attribute: id - is_primary_key: true - - attribute: name - indexed: true -clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b -record_count: 10 -last_updated_record: 1724483231970.9949 -``` - -`harperdb set_configuration logging_level=error` - -`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` - -`harperdb get_components` - -`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` - -`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` - -`harperdb sql sql='select * from dev.dog where id="1"'` - -### Remote Operations - -The CLI can also be used to run operations on remote Harper instances. To do this, pass the `target` parameter with the HTTP address of the remote instance. You generally will also need to provide credentials and specify the `username` and `password` parameters, or you can set environment variables `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD`, for example: - -```bash -export CLI_TARGET_USERNAME=HDB_ADMIN -export CLI_TARGET_PASSWORD=password -harperdb describe_database database=dev target=https://server.com:9925 -``` - -The same set of operations API are available for remote operations as well. - -#### Remote Component Deployment - -When using remote operations, you can deploy a local component to the remote instance. If you omit the `package` parameter, you can deploy the current directory. This will package the current directory and send it to the target server (also `deploy` is allowed as an alias to `deploy_component`): - -```bash -harperdb deploy target=https://server.com:9925 -``` - -If you are interacting with a cluster, you may wish to include the `replicated=true` parameter to ensure that the deployment operation is replicated to all nodes in the cluster. You will also need to restart afterwards to apply the changes (here seen with the replicated parameter): - -```bash -harperdb restart target=https://server.com:9925 replicated=true -``` diff --git a/versioned_docs/version-4.5/deployments/harper-cloud/alarms.md b/versioned_docs/version-4.5/deployments/harper-cloud/alarms.md deleted file mode 100644 index 372807e5..00000000 --- a/versioned_docs/version-4.5/deployments/harper-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -Harper Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harper-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | --------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harper-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harper-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harper-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.5/deployments/harper-cloud/index.md b/versioned_docs/version-4.5/deployments/harper-cloud/index.md deleted file mode 100644 index c0785d0d..00000000 --- a/versioned_docs/version-4.5/deployments/harper-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Harper Cloud ---- - -# Harper Cloud - -[Harper Cloud](https://studio.harperdb.io/) is the easiest way to test drive Harper, it’s Harper-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. Harper Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new Harper Cloud instance in the Harper Studio. diff --git a/versioned_docs/version-4.5/deployments/harper-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.5/deployments/harper-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 6ea4c7d2..00000000 --- a/versioned_docs/version-4.5/deployments/harper-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While Harper Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.5/deployments/harper-cloud/iops-impact.md b/versioned_docs/version-4.5/deployments/harper-cloud/iops-impact.md deleted file mode 100644 index 18e9f948..00000000 --- a/versioned_docs/version-4.5/deployments/harper-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -Harper, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running Harper. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that Harper performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers Harper Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## Harper Cloud Storage - -Harper Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all Harper Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html). - -## Estimating IOPS for Harper Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact Harper Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. Harper utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any Harper operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to Harper’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, Harper should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.5/deployments/harper-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.5/deployments/harper-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index b6104f7c..00000000 --- a/versioned_docs/version-4.5/deployments/harper-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your Harper instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -Harper on Verizon 5G Wavelength brings Harper closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from Harper to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -Harper 5G Wavelength Instance Specs While Harper 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## Harper 5G Wavelength Storage - -Harper 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of Harper, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger Harper volume. Learn more about the [impact of IOPS on performance here](./iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.5/deployments/install-harper/index.md b/versioned_docs/version-4.5/deployments/install-harper/index.md deleted file mode 100644 index d7ea3cf6..00000000 --- a/versioned_docs/version-4.5/deployments/install-harper/index.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Install Harper ---- - -# Install Harper - -## Install Harper - -This documentation contains information for installing Harper locally. Note that if you’d like to get up and running quickly, you can deploy it to [Harper Fabric](https://fabric.harper.fast) our distributed data application platform service. Harper is a cross-platform database; we recommend Linux for production use. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. Harper can also run on Windows and Mac, for development purposes only. Note: For Windows, we strongly recommend the use of Windows Subsystem for Linux (WSL). - -Harper runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing Harper, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start Harper - -Then you can install Harper with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -Harper will automatically start after installation. Harper's installation can be configured with numerous options via CLI arguments, for more information visit the [Harper Command Line Interface](./harper-cli) guide. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harper/linux). - -## With Docker - -If you would like to run Harper in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a Harper container. - -## Offline Install - -If you need to install Harper on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -## Installation on Less Common Platforms - -Harper comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.5/deployments/install-harper/linux.md b/versioned_docs/version-4.5/deployments/install-harper/linux.md deleted file mode 100644 index cc312bac..00000000 --- a/versioned_docs/version-4.5/deployments/install-harper/linux.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install Harper. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to Harper with sudo privileges exists -1. An additional volume for storing Harper files is attached to the Linux instance -1. Traffic to ports 9925 (Harper Operations API) 9926 (Harper Application Interface) and 9932 (Harper Clustering) is permitted - -While you will need to access Harper through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start Harper - -Here is an example of installing Harper with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing Harper with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../configuration): - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" -``` - -#### Start Harper on Boot - -Harper will automatically start after installation. If you wish Harper to start when the OS boots, you have two options: - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=Harper - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [Harper Command Line Interface guide](../harper-cli) and the [Harper Configuration File guide](../configuration). diff --git a/versioned_docs/version-4.5/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.5/deployments/upgrade-hdb-instance.md deleted file mode 100644 index 768b9323..00000000 --- a/versioned_docs/version-4.5/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Upgrade a Harper Instance ---- - -# Upgrade a Harper Instance - -This document describes best practices for upgrading self-hosted Harper instances. Harper can be upgraded using a combination of npm and built-in Harper upgrade scripts. Whenever upgrading your Harper installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted Harper instances only. All [Harper Cloud instances](./harper-cloud/) will be upgraded by the Harper Cloud team. - -## Upgrading - -Upgrading Harper is a two-step process. First the latest version of Harper must be downloaded from npm, then the Harper upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of Harper using `npm install -g harperdb`. - - Note `-g` should only be used if you installed Harper globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - Harper will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -Harper supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading Harper, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that Harper is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure Harper is not running: - -```bash -harperdb stop -``` - -Uninstall Harper. Note, this step is not required, but will clean up old artifacts of Harper. We recommend removing all other Harper installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install Harper globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start Harper - -```bash -harperdb start -``` - ---- - -## Upgrading Nats to Plexus 4.4 - -To upgrade from NATS clustering to Plexus replication, follow these manual steps. They are designed for a fully replicating cluster to ensure minimal disruption during the upgrade process. - -The core of this upgrade is the _bridge node_. This node will run both NATS and Plexus simultaneously, ensuring that transactions are relayed between the two systems during the transition. The bridge node is crucial in preventing any replication downtime, as it will handle transactions from NATS nodes to Plexus nodes and vice versa. - -### Enabling Plexus - -To enable Plexus on a node that is already running NATS, you will need to update [two values](./configuration) in the `harperdb-config.yaml` file: - -```yaml -replication: - url: wss://my-cluster-node-1:9925 - hostname: node-1 -``` - -`replication.url` – This should be set to the URL of the current Harper instance. - -`replication.hostname` – Since we are upgrading from NATS, this value should match the `clustering.nodeName` of the current instance. - -### Upgrade Steps - -1. Set up the bridge node: - - Choose one node to be the bridge node. - - On this node, follow the "Enabling Plexus" steps from the previous section, but **do not disable NATS clustering on this instance.** - - Stop the instance and perform the upgrade. - - Start the instance. This node should now be running both Plexus and NATS. -1. Upgrade a node: - - Choose a node that needs upgrading and enable Plexus by following the "Enable Plexus" steps. - - Disable NATS by setting `clustering.enabled` to `false`. - - Stop the instance and upgrade it. - - Start the instance. - - Call [`add_node`](../developers/operations-api/clustering#add-node) on the upgraded instance. In this call, omit `subscriptions` so that a fully replicating cluster is built. The target node for this call should be the bridge node. _Note: depending on your setup, you may need to expand this `add_node` call to include_ [_authorization and/or tls information_](../developers/operations-api/clustering#add-node)_._ - -```json -{ - "operation": "add_node", - "hostname:": "node-1", - "url": "wss://my-cluster-node-1:9925" -} -``` - -1. Repeat Step 2 on all remaining nodes that need to be upgraded. -1. Disable NATS on the bridge node by setting `clustering.enabled` to `false` and restart the instance. - -Your cluster upgrade should now be complete, with no NATS processes running on any of the nodes. diff --git a/versioned_docs/version-4.5/developers/_category_.json b/versioned_docs/version-4.5/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/versioned_docs/version-4.5/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/versioned_docs/version-4.5/developers/applications/caching.md b/versioned_docs/version-4.5/developers/applications/caching.md deleted file mode 100644 index 317f3f6b..00000000 --- a/versioned_docs/version-4.5/developers/applications/caching.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -title: Caching ---- - -# Caching - -Harper has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, Harper makes an ideal data caching server. Harper can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. Harper also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, Harper is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](./defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: - -- `expiration` - The amount of time until a record goes stale. -- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). -- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -#### How `scanInterval` Determines the Eviction Cycle - -`scanInterval` determines fixed clock-aligned times when eviction runs, and these times are the same regardless of when the server started. Harper takes the `scanInterval` and divides the TTL (`expiration` + `eviction`) into evenly spaced “anchor times.” These anchors are calculated in the local timezone of the server. This allows Harper to “snap” the eviction schedule to predictable points on the clock, such as every 15 minutes or every 6 hours, based on the interval length. As a result: - -- The server’s startup time does not affect when eviction runs. -- Eviction timings are deterministic and timezone-aware. -- For any given configuration, the eviction schedule is the same across restarts and across servers in the same local timezone. - -#### Example: 1-Hour Expiration - -`expiration` = 1 hour with default `scanInterval` (15 minutes, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 00:15 -> 00:30 -> 00:45 -> 01:00 -> ... continuing every 15 minutes ... - -If the server starts at 12:05 it does not run eviction at 12:20 or “15 minutes after startup.” Instead, the next scheduled anchor is 12:15, then 12:30, 12:45, 13:00, etc. The schedule is clock-aligned, not startup-aligned. - -#### Example: 1-Day Expiration - -`expiration` = 1 day with default `scanInterval` (6 hours, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 06:00 -> 12:00 -> 18:00 -> ... continuing every 6 hours ... - -If the server starts at 12:05 the next matching eviction time is 18:00 the same day, then 00:00, 06:00, 12:00, 18:00, etc. If the server starts at 19:30 the schedule does not shift. Instead, the next anchor time is 00:00, and the regular 6-hour cycle continues. - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the Harper environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyCache } = tables; -MyCache.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), Harper will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -Harper handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - setInterval(() => { // every second retrieve more data - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - }, 1000); - } - async get() { -... -``` - -Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record. -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, Harper will only run the subscribe method on one thread. Harper is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in Harper. Caching tables also have [subscription capabilities](./caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of Harper, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - const post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of post.comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). - -## Caching Flow - -It may be helpful to understand the flow of a cache request. When a request is made to a caching table: - -- Harper will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. - - If the record is not in the cache, Harper will first check if there is a current request to get the record from the source. If there is, Harper will wait for the request to complete and return the record from the cache. - - If not, Harper will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. - - If the record is in the cache, Harper will check if the record is stale. If the record is not stale, Harper will immediately return the record from the cache. If the record is stale, Harper will call the `get()` method on the source to retrieve the record. - - The record will then be stored in the cache. This will write the record to the cache in a separate asynchronous/background write-behind transaction, so it does not block the current request, then return the data immediately once it has it. -- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. - -### Caching Flow with Write-Through - -When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: - -- Harper will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. -- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. -- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. - - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). - - The transaction writes will then be written the local caching table. -- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/versioned_docs/version-4.5/developers/applications/debugging.md b/versioned_docs/version-4.5/developers/applications/debugging.md deleted file mode 100644 index bd9d2622..00000000 --- a/versioned_docs/version-4.5/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -Harper components and applications run inside the Harper process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the Harper entry script with your IDE, or you can start Harper in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run Harper in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use Harper's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by Harper. This logger can be used to output messages directly to the Harper log using standardized logging level functions, described below. The log level can be set in the [Harper Configuration File](../../deployments/configuration). - -Harper Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The Harper Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the Studio Status page. Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the Harper log. diff --git a/versioned_docs/version-4.5/developers/applications/define-routes.md b/versioned_docs/version-4.5/developers/applications/define-routes.md deleted file mode 100644 index c442f9f1..00000000 --- a/versioned_docs/version-4.5/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -Harper’s applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](./define-routes#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](./req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.5/developers/applications/defining-roles.md b/versioned_docs/version-4.5/developers/applications/defining-roles.md deleted file mode 100644 index 365aa132..00000000 --- a/versioned_docs/version-4.5/developers/applications/defining-roles.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Defining Application Roles ---- - -# Defining Application Roles - -Applications are more than just tables and endpoints — they need access rules. Harper lets you define roles directly in your application so you can control who can do what, without leaving your codebase. - -Let’s walk through creating a role, assigning it, and seeing it in action. - -## Step 1: Declare a Role - -First, point Harper to a roles configuration file. Add this to your `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -Then create a simple `roles.yaml` in your application directory. For example, here’s a role that can only read and insert data into the `Dog` table: - -```yaml -dog-reader: - super_user: false - data: - Dog: - read: true - insert: true -``` - -When Harper starts up, it will create this role (or update it if it already exists). - -## Step 2: Create a User for the Role - -Next, create a non-super_user user and assign them this role. You can do this with the [Users and Roles API](../security/users-and-roles) (requires a super_user to run): - -```bash -curl -u admin:password -X POST http://localhost:9926 \ - -H "Content-Type: application/json" \ - -d '{ - "operation": "add_user", - "username": "alice", - "password": "password", - "role": "dog_reader" - }' -``` - -Now you have a user named `alice` with the `dog_reader` role. - -## Step 3: Make Requests as Different Users - -Authenticate requests as `alice` to see how her role works: - -```bash -# allowed (insert, role permits insert) -curl -u alice:password -X POST http://localhost:9926/Dog/ \ - -H "Content-Type: application/json" \ - -d '{"name": "Buddy", "breed": "Husky"}' - -# not allowed (delete, role does not permit delete) -curl -u alice:password -X DELETE http://localhost:9926/Dog/1 -``` - -The first request succeeds with a `200 OK`. The second fails with a `403 Forbidden`. - -Now compare with a super_user: - -```bash -# super_user can delete -curl -u admin:password -X DELETE http://localhost:9926/Dog/1 -``` - -This succeeds because the super_user role has full permissions. - -## Where to Go Next - -This page gave you the basics - declare a role, assign it, and see it work. - -For more advanced scenarios, including: - -- defining multiple databases per role, -- granting fine-grained attribute-level permissions, -- and the complete structure of `roles.yaml`, - -see the [Roles Reference](../../reference/roles). diff --git a/versioned_docs/version-4.5/developers/applications/defining-schemas.md b/versioned_docs/version-4.5/developers/applications/defining-schemas.md deleted file mode 100644 index cc8c4c75..00000000 --- a/versioned_docs/version-4.5/developers/applications/defining-schemas.md +++ /dev/null @@ -1,222 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in Harper's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that Harper uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -Database naming: the default "data" database is generally a good default choice for tables in applications that will not be reused in other applications (and don't need to worry about staying in a separate namespace). Application with many tables may wish to organize the tables into separate databases (but remember that transactions do not preserve atomicity across different databases, only across tables in the same database). For components that are designed for re-use, it is recommended that you use a database name that is specific to the component (e.g. "my-component-data") to avoid name collisions with other components. - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -### Relationships: `@relationship` - -Defining relationships is the foundation of using "join" queries in Harper. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. - -#### `@relationship(from: attribute)` - -This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: - -```graphql -type Product @table @export { - id: ID @primaryKey - brandId: ID @indexed - brand: Brand @relationship(from: brandId) -} -type Brand @table @export { - id: ID @primaryKey -} -``` - -Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resource) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). - -Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: - -```graphql -type Product @table @export { - id: ID @primaryKey - featureIds: [ID] @indexed # array of ids - features: [Feature] @relationship(from: featureIds) # array of referenced feature records -} -type Feature @table { - id: ID @primaryKey - ... -} -``` - -#### `@relationship(to: attribute)` - -This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: - -```graphql -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: brandId) -} -``` - -Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. - -Note that schemas can also reference themselves with relationships, allowing records to define relationships like parent-child relationships between records in the same table. Also note, that for a many-to-many relationship, you must not combine the `to` and `from` property in the same relationship directive. - -### Computed Properties: `@computed` - -The `@computed` directive specifies that a field is computed based on other fields in the record. This is useful for creating derived fields that are not stored in the database, but are computed when specific record fields is queried/accessed. The `@computed` directive must be used in combination with a field that is a function that computes the value of the field. For example: - -```graphql -type Product @table { - id: ID @primaryKey - price: Float - taxRate: Float - totalPrice: Float @computed(from: "price + (price * taxRate)") -} -``` - -The `from` argument specifies the expression that computes the value of the field. The expression can reference other fields in the record. The expression is evaluated when the record is queried or indexed. - -The `computed` directive may also be defined in a JavaScript module, which is useful for more complex computations. You can specify a computed attribute, and then define the function with the `setComputedAttribute` method. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed -} -``` - -```javascript -tables.Product.setComputedAttribute('totalPrice', (record) => { - return record.price + record.price * record.taxRate; -}); -``` - -Computed properties may also be indexed, which provides a powerful mechanism for creating indexes on derived fields with custom querying capabilities. This can provide a mechanism for composite indexes, custom full-text indexing, vector indexing, or other custom indexing strategies. A computed property can be indexed by adding the `@indexed` directive to the computed property. When using a JavaScript module for a computed property that is indexed, it is highly recommended that you specify a `version` argument to ensure that the computed attribute is re-evaluated when the function is updated. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed(version: 1) @indexed -} -``` - -If you were to update the `setComputedAttribute` function for the `totalPrice` attribute, to use a new formula, you must increment the `version` argument to ensure that the computed attribute is re-indexed (note that on a large database, re-indexing may be a lengthy operation). Failing to increment the `version` argument with a modified function can result in an inconsistent index. The computed function must be deterministic, and should not have side effects, as it may be re-evaluated multiple times during indexing. - -Note that computed properties will not be included by default in a query result, you must explicitly include them in query results using the `select` query function. - -Another example of using a computed custom index, is that we could index all the comma-separated words in a `tags` property by doing (similar techniques are used for full-text indexing): - -```graphql -type Product @table { - id: ID @primaryKey - tags: String # comma delimited set of tags - tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed # split and index the tags -} -``` - -For more in-depth information on computed properties, visit our blog [here](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated if no primary key is provided. When a primary key is auto-generated, it will be a UUID (as a string) if the primary key type is `String` or `ID`. If the primary key type is `Int`, `Long`, or `Any`, then the primary key will be an auto-incremented number. Using numeric primary keys is more efficient than using UUIDs. Note that if the type is `Int`, the primary key will be limited to 32-bit, which can be limiting and problematic for large tables. It is recommended that if you will be relying on auto-generated keys, that you use a primary key type of `Long` or `Any` (the latter will allow you to also use strings as primary keys). - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself. - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -Harper supports the following field types in addition to user defined (object) types: - -- `String`: String/text -- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647) -- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992) -- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available) -- `BigInt`: Any integer (negative or positive) with less than 300 digits (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately) -- `Boolean`: true or false -- `ID`: A string (but indicates it is not intended to be human readable) -- `Any`: Any primitive, object, or array is allowed -- `Date`: A Date object -- `Bytes`: Binary data as a Buffer or Uint8Array -- `Blob`: Binary data as a [Blob](../../reference/blob), designed for large blocks of data that can be streamed. It is recommend that you use this for binary data that will typically be larger than 20KB. - -#### Renaming Tables - -It is important to note that Harper does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. - -### OpenAPI Specification - -_The_ [_OpenAPI Specification_](https://spec.openapis.org/oas/v3.1.0) _defines a standard, programming language-agnostic interface description for HTTP APIs, which allows both humans and computers to discover and understand the capabilities of a service without requiring access to source code, additional documentation, or inspection of network traffic._ - -If a set of endpoints are configured through a Harper GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. - -_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/versioned_docs/version-4.5/developers/applications/index.md b/versioned_docs/version-4.5/developers/applications/index.md deleted file mode 100644 index 02ab0974..00000000 --- a/versioned_docs/version-4.5/developers/applications/index.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -title: Applications ---- - -# Applications - -## Overview of Harper Applications - -Harper is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of Harper instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that Harper provides by building a Harper component, a fundamental building-block of the Harper ecosystem. - -When working through this guide, we recommend you use the [Harper Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -## Understanding the Component Application Architecture - -Harper provides several types of components. Any package that is added to Harper is called a "component", and components are generally categorized as either "applications", which deliver a set of endpoints for users, or "extensions", which are building blocks for features like authentication, additional protocols, and connectors that can be used by other components. Components can be added to the `hdb/components` directory and will be loaded by Harper when it starts. Components that are remotely deployed to Harper (through the studio or the operation API) are installed into the `hdb/node_modules` directory. Using `harperdb run .` or `harperdb dev .` allows us to specifically load a certain application in addition to any that have been manually added to `hdb/components` or installed (in `hdb/node_modules`). - -```mermaid -flowchart LR - Client(Client)-->Endpoints - Client(Client)-->HTTP - Client(Client)-->Extensions - subgraph Harper - direction TB - Applications(Applications)-- "Schemas" --> Tables[(Tables)] - Applications-->Endpoints[/Custom Endpoints/] - Applications-->Extensions - Endpoints-->Tables - HTTP[/REST/HTTP/]-->Tables - Extensions[/Extensions/]-->Tables - end -``` - -## Custom Functionality with JavaScript - -[The getting started guide](/learn/) covers how to build an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In Harper, data is accessed through our [Resource API](../../reference/resources/), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the Harper provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - get(query) { - this.humanAge = 15 + this.age * 5; // silly calculation of human age equivalent - return super.get(query); - } -} -``` - -Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. The instance holds information about the primary key of the record so updates and actions can be applied to the correct record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](./rest#selectproperties)). - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -And next we will use this table in our `get()` method. We will call the new table's (static) `get()` method to retrieve a breed by id. To do this correctly, we access the table using our current context by passing in `this` as the second argument. This is important because it ensures that we are accessing the data atomically, in a consistent snapshot across tables. This provides automatically tracking of most recently updated timestamps across resources for caching purposes. This allows for sharing of contextual metadata (like user who requested the data), and ensure transactional atomicity for any writes (not needed in this get operation, but important for other operations). The resource methods are automatically wrapped with a transaction (will commit/finish when the method completes), and this allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - async get(query) { - let breedDescription = await Breed.get(this.breed, this); - this.breedDescription = breedDescription; - return super.get(query); - } -} -``` - -The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - async post(data) { - if (data.action === 'add-trick') this.tricks.push(data.trick); - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). - -We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post` method or `put` method to do this, but we may want to separate the logic so these methods can be called separately without authorization checks. The [Resource API](../../reference/resources/) defines `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete`, or to easily configure individual capabilities. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - allowUpdate(user) { - return this.owner === user.username; - } -} -``` - -Any methods that are not defined will fall back to Harper's default authorization procedure based on users' roles. If you are using/extending a table, this is based on Harper's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to url like // to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get() { - return (await fetch(`https://best-dog-site.com/${this.getId()}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](applications/caching) provides much more information on how to use Harper's powerful caching capabilities and set up data sources. - -Harper provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../../reference/globals) and the [Resource interface](../../reference/resources/). - -## Configuring Applications/Components - -Every application or component can define their own configuration in a `config.yaml`. If you are using the application template, you will have a [default configuration in this config file](https://github.com/HarperDB/application-template/blob/main/config.yaml) (which is default configuration if no config file is provided). Within the config file, you can configure how different files and resources are loaded and handled. The default configuration file itself is documented with directions. Each entry can specify any `files` that the loader will handle, and can also optionally specify what, if any, URL `path`s it will handle. A path of `/` means that the root URLs are handled by the loader, and a path of `.` indicates that the URLs that start with this application's name are handled. - -This config file allows you define a location for static files, as well (that are directly delivered as-is for incoming HTTP requests). - -Each configuration entry can have the following properties, in addition to properties that may be specific to the individual component: - -- `files`: This specifies the set of files that should be handled the component. This is a glob pattern, so a set of files can be specified like "directory/\*\*". -- `path`: This is the URL path that is handled by this component. -- `root`: This specifies the root directory for mapping file paths to the URLs. For example, if you want all the files in `web/**` to be available in the root URL path via the static handler, you could specify a root of `web`, to indicate that the web directory maps to the root URL path. -- `package`: This is used to specify that this component is a third party package, and can be loaded from the specified package reference (which can be an NPM package, Github reference, URL, etc.). - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. Harper includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as Harper's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the Harper's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, Harper will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/versioned_docs/version-4.5/developers/applications/web-applications.md b/versioned_docs/version-4.5/developers/applications/web-applications.md deleted file mode 100644 index 02fd1893..00000000 --- a/versioned_docs/version-4.5/developers/applications/web-applications.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Web Applications on Harper ---- - -# Web Applications on Harper - -Harper is an efficient, capable, and robust platform for developing web applications, with numerous capabilities designed -specifically for optimized web application delivery. In addition, there are a number of tools and frameworks that can be used -with Harper to create web applications with standard best-practice design and development patterns. Running these frameworks -on Harper can unlock tremendous scalability and performance benefits by leveraging Harper's built-in multi-threading, -caching, and distributed design. - -Harper's unique ability to run JavaScript code directly on the server side, combined with its built-in database for data storage, querying, and caching -allows you to create full-featured web applications with a single platform. This eliminates the overhead of legacy solutions that -require separate application servers, databases, and caching layers, and their requisite communication overhead and latency, while -allowing the full stack to deployed to distributed locations with full local response handling, providing an incredibly low latency web experience. - -## Web Application Frameworks - -With built-in caching mechanisms, and an easy-to-use JavaScript API for interacting with data, creating full-featured applications -using popular frameworks is a simple and straightforward process. - -Get started today with one of our examples: - -- [Next.js](https://github.com/HarperDB/nextjs-example) -- [React SSR](https://github.com/HarperDB/react-ssr-example) -- [Vue SSR](https://github.com/HarperDB/vue-ssr-example) -- [Svelte SSR](https://github.com/HarperDB/svelte-ssr-example) -- [Solid SSR](https://github.com/HarperDB/solid-ssr-example) - -## Cookie Support - -Harper includes support for authenticated sessions using cookies. This allows you to create secure, authenticated web applications -using best-practice security patterns, allowing users to login and maintain a session without any credential storage on the client side -that can be compromised. A login endpoint can be defined by exporting a resource and calling the `login` method on the request object. For example, this could be a login endpoint in your resources.js file: - -```javascript -export class Login extends Resource { - async post(data) { - const { username, password } = data; - await request.login(username, password); - return { message: 'Logged in!' }; - } -} -``` - -This endpoint can be called from the client side using a standard fetch request, a cookie will be returned, and the session will be maintained by Harper. -This allows web applications to directly interact with Harper and database resources, without needing to go through extra layers of authentication handling. - -## Browser Caching Negotiation - -Browsers support caching negotiation with revalidation, which allows requests for locally cached data to be sent to servers with a tag or timestamp. Harper REST functionality can fully interact with these headers, and return `304 Not Modified` response based on prior `Etag` sent in headers. It is highly recommended that you utilize the [REST interface](../rest) for accessing tables, as it facilitates this downstream browser caching. Timestamps are recorded with all records and are then returned [as the `ETag` in the response](../rest#cachingconditional-requests). Utilizing this browser caching can greatly reduce the load on your server and improve the performance of your web application by being able to instantly use locally cached data after revalidation from the server. - -## Built-in Cross-Origin Resource Sharing (CORS) - -Harper includes built-in support for Cross-Origin Resource Sharing (CORS), which allows you to define which domains are allowed to access your Harper instance. This is a critical security feature for web applications, as it prevents unauthorized access to your data from other domains, while allowing cross-domain access from known hosts. You can define the allowed domains in your [Harper configuration file](../../deployments/configuration#http), and Harper will automatically handle the CORS headers for you. - -## More Resources - -Make sure to check out our developer videos too: - -- [Next.js on Harper | Step-by-Step Guide for Next Level Next.js Performance](https://youtu.be/GqLEwteFJYY) -- [Server-side Rendering (SSR) with Multi-Tier Cache Demo](https://youtu.be/L-tnBNhO9Fc) diff --git a/versioned_docs/version-4.5/developers/components/built-in.md b/versioned_docs/version-4.5/developers/components/built-in.md deleted file mode 100644 index b7f4498e..00000000 --- a/versioned_docs/version-4.5/developers/components/built-in.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Built-In Components ---- - -# Built-In Components - -Harper provides extended features using built-in components. They do **not** need to be installed with a package manager, and simply must be specified in a config to run. These are used throughout many Harper docs, guides, and examples. Unlike external components which have their own semantic versions, built-in components follow Harper's semantic version. - -- [Built-In Components](#built-in-components) - - [fastifyRoutes](#fastifyroutes) - - [graphql](#graphql) - - [graphqlSchema](#graphqlschema) - - [jsResource](#jsresource) - - [loadEnv](#loadenv) - - [rest](#rest) - - [roles](#roles) - - [static](#static) - - - - - -## fastifyRoutes - -Specify custom endpoints using [Fastify](https://fastify.dev/). - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Define Fastify Routes](../applications/define-routes) - -```yaml -fastifyRoutes: - files: './routes/*.js' -``` - -## graphql - -> GraphQL querying is **experimental**, and only partially implements the GraphQL Over HTTP / GraphQL specifications. - -Enables GraphQL querying via a `/graphql` endpoint loosely implementing the GraphQL Over HTTP specification. - -Complete documentation for this feature is available here: [GraphQL](../../reference/graphql) - -```yaml -graphql: true -``` - -## graphqlSchema - -Specify schemas for Harper tables and resources via GraphQL schema syntax. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Schemas](../applications/defining-schemas) - -```yaml -graphqlSchema: - files: './schemas.graphql' -``` - -## jsResource - -Specify custom, JavaScript based Harper resources. - -Refer to the Application [Custom Functionality with JavaScript](../applications/#custom-functionality-with-javascript) guide, or [Resource Class](../../reference/resource) reference documentation for more information on custom resources. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -```yaml -jsResource: - files: './resource.js' -``` - -## loadEnv - -Load environment variables via files like `.env`. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Ensure this component is specified first in `config.yaml` so that environment variables are loaded prior to loading any other components. - -```yaml -loadEnv: - files: '.env' -``` - -This component matches the default behavior of dotenv where existing variables take precedence. Specify the `override` option in order to override existing environment variables assigned to `process.env`: - -```yaml -loadEnv: - files: '.env' - override: true -``` - -> Important: Harper is a single process application. Environment variables are loaded onto `process.env` and will be shared throughout all Harper components. This means environment variables loaded by one component will be available on other components (as long as the components are loaded in the correct order). - - - - - - - - - -## rest - -Enable automatic REST endpoint generation for exported resources with this component. - -Complete documentation for this feature is available here: [REST](../rest) - -```yaml -rest: true -``` - -This component contains additional options: - -To enable `Last-Modified` header support: - -```yaml -rest: - lastModified: true -``` - -To disable automatic WebSocket support: - -```yaml -rest: - webSocket: false -``` - -## roles - -Specify roles for Harper tables and resources. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Roles](../applications/defining-roles) - -```yaml -roles: - files: './roles.yaml' -``` - -## static - -Specify which files to server statically from the Harper HTTP endpoint. Built using the [send](https://www.npmjs.com/package/send) and [serve-static](https://www.npmjs.com/package/serve-static) modules. - -This component is a [Resource Extension](./reference#resource-extension) and can be configured with the [`files`, `path`, and `root`](./reference#resource-extension-configuration) configuration options. - -```yaml -static: - files: './web/*' -``` diff --git a/versioned_docs/version-4.5/developers/components/index.md b/versioned_docs/version-4.5/developers/components/index.md deleted file mode 100644 index 9a5609dc..00000000 --- a/versioned_docs/version-4.5/developers/components/index.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Components ---- - -# Components - -Harper components are a core Harper concept defined as flexible JavaScript based _extensions_ of the highly extensible core Harper platform. They are executed by Harper directly and have complete access to the Harper [Global APIs](../../reference/globals) (such as `Resource`, `databases`, and `tables`). - -A key aspect to components are their extensibility; components can be built on other components. For example, a [Harper Application](../../developers/applications) is a component that uses many other components. The [application template](https://github.com/HarperDB/application-template) demonstrates many of Harper's built-in components such as `rest` (for automatic REST endpoint generation), `graphqlSchema` (for table schema definitions), and many more. - -From management to development, the following pages document everything a developer needs to know about Harper components. - -- [Managing Components](components/managing) - developing, installing, deploying, and executing Harper components locally and remotely -- [Technical Reference](components/reference) - detailed, technical reference for component development -- [Built-In Components](components/built-in) - documentation for all of Harper's built-in components (i.e. `rest`) - -## Custom Components - -The following list is all of Harper's officially maintained, custom components. They are all available on npm and GitHub. - -- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) -- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) -- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) -- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) -- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) -- [`@harperdb/astro`](https://github.com/HarperDB/astro) diff --git a/versioned_docs/version-4.5/developers/components/managing.md b/versioned_docs/version-4.5/developers/components/managing.md deleted file mode 100644 index 97402e39..00000000 --- a/versioned_docs/version-4.5/developers/components/managing.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Managing ---- - -# Managing - -Harper offers several approaches to managing components that differ between local development and Harper managed instances. This page will cover the recommended methods of developing, installing, deploying, and running Harper components. - -## Local Development - -Harper is designed to be simple to run locally. Generally, Harper should be installed locally on a machine using a global package manager install (i.e. `npm i -g harperdb`). - -> Before continuing, ensure Harper is installed and the `harperdb` CLI is available. For more information, review the [installation guide](../../deployments/install-harper/). - -When developing a component locally there are a number of ways to run it on Harper. - -### `dev` and `run` commands - -The quickest way to run a component is by using the `dev` command within the component directory. - -The `harperdb dev .` command will automatically watch for file changes within the component directory and restart the Harper threads when changes are detected. - -The `dev` command will **not** restart the main thread; if this is a requirement, switch to using `run` instead and manually start/stop the process to execute the main thread. - -Stop execution for either of these processes by sending a SIGINT (generally CTRL+C) signal to the process. - -### Deploying to a local Harper instance - -Alternatively, to mimic interfacing with a hosted Harper instance, use operation commands instead. - -1. Start up Harper with `harperdb` -1. _Deploy_ the component to the local instance by executing: - - ```sh - harperdb deploy_component \ - project= \ - package= \ - restart=true - ``` - - - Make sure to omit the `target` option so that it _deploys_ to the Harper instance running locally - - The `package=` option creates a symlink to the component simplifying restarts - - By default, the `deploy_component` operation command will _deploy_ the current directory by packaging it up and streaming the bytes. By specifying `package`, it skips this and references the file path directly - - The `restart=true` option automatically restarts Harper threads after the component is deployed - - If set to `'rolling'`, a rolling restart will be triggered after the component is deployed - -1. In another terminal, use the `harperdb restart` command to restart the instance's threads at any time - - With `package=`, the component source is symlinked so changes will automatically be picked up between restarts - - If `package` was omitted, run the `deploy_component` command again with any new changes -1. To remove the component use `harperdb drop_component project=` - -Similar to the previous section, if the main thread needs to be restarted, start and stop the Harper instance manually (with the component deployed). Upon Harper startup, the component will automatically be loaded and executed across all threads. - -> Not all [component operations](../operations-api/components) are available via CLI. When in doubt, switch to using the Operations API via network requests to the local Harper instance. - -For example, to properly _deploy_ a `test-component` locally, the command would look like: - -```sh -harperdb deploy_component \ - project=test-component \ - package=/Users/dev/test-component \ - restart=true -``` - -> If the current directory is the component directory, use a shortcut such as `package=$(pwd)` to avoid typing out the complete path. - -## Remote Management - -Managing components on a remote Harper instance is best accomplished through [component operations](../operations-api/components), similar to using the `deploy_component` command locally. Before continuing, always backup critical Harper instances. Managing, deploying, and executing components can directly impact a live system. - -Remote Harper instances work very similarly to local Harper instances. The primary component management operations still include `deploy_component`, `drop_component`, and `restart`. - -The key to remote management is specifying a remote `target` along with appropriate username/password values. These can all be specified using CLI arguments: `target`, `username`, and `password`. Alternatively, the `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` environment variables can replace the `username` and `password` arguments. - -All together: - -```sh -harperdb deploy_component \ - project= \ - package= \ - username= \ - password= \ - target= \ - restart=true \ - replicated=true -``` - -Or, using environment variables: - -```sh -export CLI_TARGET_USERNAME= -export CLI_TARGET_PASSWORD= -harperdb deploy_component \ - project= \ - package= \ - target= \ - restart=true \ - replicated=true -``` - -Unlike local development where `package` should be set to a local file path for symlinking and improved development experience purposes, now it has some additional options. - -A local component can be deployed to a remote instance by **omitting** the `package` field. Harper will automatically package the local directory and include that along with the rest of the deployment operation. - -Furthermore, the `package` field can be set to any valid [npm dependency value](https://docs.npmjs.com/cli/v11/configuring-npm/package-json#dependencies). - -- For components deployed to npm, specify the package name: `package="@harperdb/status-check"` -- For components on GitHub, specify the URL: `package="https://github.com/HarperDB/status-check"`, or the shorthand `package=HarperDB/status-check` -- Private repositories also work if the correct SSH keys are on the server: `package="git+ssh://git@github.com:HarperDB/secret-component.git"` - - Reference the [SSH Key](../operations-api/components#add-ssh-key) operations for more information on managing SSH keys on a remote instance -- Even tarball URLs are supported: `package="https://example.com/component.tar.gz"` - -> When using git tags, we highly recommend that you use the semver directive to ensure consistent and reliable installation by npm. In addition to tags, you can also reference branches or commit numbers. - -These `package` values are all supported because behind-the-scenes, Harper is generating a `package.json` file for the components. Then, it uses a form of `npm install` to resolve them as dependencies. This is why symlinks are generated when specifying a file path locally. The following [Advanced](./managing#advanced) section explores this pattern in more detail. - -Finally, don't forget to include `restart=true`, or run `harperdb restart target=`. - -## Advanced - -The following methods are advanced and should be executed with caution as they can have unintended side-effects. Always backup any critical Harper instances before continuing. - -First, locate the Harper installation `rootPath` directory. Generally, this is `~/hdb`. It can be retrieved by running `harperdb get_configuration` and looking for the `rootPath` field. - -> For a useful shortcut on POSIX compliant machines run: `harperdb get_configuration json=true | jq ".rootPath" | sed 's/"/g'` - -This path is the Harper instance. Within this directory, locate the root config titled `harperdb-config.yaml`, and the components root path. The components root path will be `/components` by default (thus, `~/hdb/components`), but it can also be configured. If necessary, use `harperdb get_configuration` again and look for the `componentsRoot` field for the exact path. - -### Adding components to root - -Similar to how components can specify other components within their `config.yaml`, components can be added to Harper by adding them to the `harperdb-config.yaml`. - -The configuration is very similar to that of `config.yaml`. Entries are comprised of a top-level `:`, and an indented `package: ` field. Any additional component options can also be included as indented fields. - -```yaml -status-check: - package: '@harperdb/status-check' -``` - -The key difference between this and a component's `config.yaml` is that the name does **not** need to be associated with a `package.json` dependency. When Harper starts up, it transforms these configurations into a `package.json` file, and then executes a form of `npm install`. Thus, the `package: ` can be any valid dependency syntax such as npm packages, GitHub repos, tarballs, and local directories are all supported. - -Given a root config like: - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from npm -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -Harper will generate a `package.json` like: - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -npm will install all the components and store them in ``. A symlink back to `/node_modules` is also created for dependency resolution purposes. - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -By specifying a file path, npm will generate a symlink and then changes will be automatically picked up between restarts. diff --git a/versioned_docs/version-4.5/developers/components/reference.md b/versioned_docs/version-4.5/developers/components/reference.md deleted file mode 100644 index 525ffa6a..00000000 --- a/versioned_docs/version-4.5/developers/components/reference.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: Component Reference ---- - -# Component Reference - -The technical definition of a Harper component is fairly loose. In the absolute, simplest form, a component is any JavaScript module that is compatible with the [default component configuration](#default-component-configuration). For example, a module with a singular `resources.js` file is technically a valid component. - -Harper provides many features as _built-in components_, these can be used directly without installing any other dependencies. - -Other features are provided by _custom components_. These can be npm packages such as [@harperdb/nextjs](https://github.com/HarperDB/nextjs) and [@harperdb/apollo](https://github.com/HarperDB/apollo) (which are maintained by Harper), or something maintained by the community. Custom components follow the same configuration rules and use the same APIs that Harper's built-in components do. The only difference is that they must be apart of the component's dependencies. - -> Documentation is available for all [built-in](./built-in) and [custom](./#custom-components) Harper components. - - - -## Component Configuration - -Harper components are configured with a `config.yaml` file located in the root of the component module directory. This file is how a component configures other components it depends on. Each entry in the file starts with a component name, and then configuration values are indented below it. - -```yaml -name: - option-1: value - option-2: value -``` - -It is the entry's `name` that is used for component resolution. It can be one of the [built-in components](./built-in), or it must match a package dependency of the component as specified by `package.json`. The [Custom Component Configuration](#custom-component-configuration) section provides more details and examples. - -For some built-in components they can be configured with as little as a top-level boolean; for example, the [rest](./built-in#rest) extension can be enabled with just: - -```yaml -rest: true -``` - -Other components (built-in or custom), will generally have more configuration options. Some options are ubiquitous to the Harper platform, such as the `files`, `path`, and `root` options for a [Resource Extension](#resource-extension-configuration), or `package` for a [custom component](#custom-component-configuration). Additionally, [custom options](#protocol-extension-configuration) can be defined for [Protocol Extensions](#protocol-extension). - -### Custom Component Configuration - -Any custom component **must** be configured with the `package` option in order for Harper to load that component. When enabled, the name of package must match a dependency of the component. For example, to use the `@harperdb/nextjs` extension, it must first be included in `package.json`: - -```json -{ - "dependencies": { - "@harperdb/nextjs": "^1.0.0" - } -} -``` - -Then, within `config.yaml` it can be enabled and configured using: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - # ... -``` - -Since npm allows for a [variety of dependency configurations](https://docs.npmjs.com/cli/configuring-npm/package-json#dependencies), this can be used to create custom references. For example, to depend on a specific GitHub branch, first update the `package.json`: - -```json -{ - "dependencies": { - "harper-nextjs-test-feature": "HarperDB/nextjs#test-feature" - } -} -``` - -And now in `config.yaml`: - -```yaml -harper-nextjs-test-feature: - package: '@harperdb/nextjs' - files: '/*' - # ... -``` - -### Default Component Configuration - -Harper components do not need to specify a `config.yaml`. Harper uses the following default configuration to load components. - -```yaml -rest: true -graphql: true -graphqlSchema: - files: '*.graphql' -roles: - files: 'roles.yaml' -jsResource: - files: 'resources.js' -fastifyRoutes: - files: 'routes/*.js' - path: '.' -static: - files: 'web/**' -``` - -Refer to the [built-in components](./built-in) documentation for more information on these fields. - -If a `config.yaml` is defined, it will **not** be merged with the default config. - -## Extensions - -A Harper Extension is a extensible component that is intended to be used by other components. The built-in components [graphqlSchema](./built-in#graphqlschema) and [jsResource](./built-in#jsresource) are both examples of extensions. - -There are two key types of Harper Extensions: **Resource Extension** and **Protocol Extensions**. The key difference is a **Protocol Extensions** can return a **Resource Extension**. - -Functionally, what makes an extension a component is the contents of `config.yaml`. Unlike the Application Template referenced earlier, which specified multiple components within the `config.yaml`, an extension will specify an `extensionModule` option. - -- `extensionModule` - `string` - _required_ - A path to the extension module source code. The path must resolve from the root of the extension module directory. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) `config.yaml` specifies `extensionModule: ./extension.js`. - -If the extension is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `extensionModule: ./dist/index.js`) - -It is also recommended that all extensions have a `package.json` that specifies JavaScript package metadata such as name, version, type, etc. Since extensions are just JavaScript packages, they can do anything a JavaScript package can normally do. It can be written in TypeScript, and compiled to JavaScript. It can export an executable (using the [bin](https://docs.npmjs.com/cli/configuring-npm/package-json#bin) property). It can be published to npm. The possibilities are endless! - -Furthermore, what defines an extension separately from a component is that it leverages any of the [Resource Extension](#resource-extension-api) or [Protocol Extension](#protocol-extension-api) APIs. The key is in the name, **extensions are extensible**. - -### Resource Extension - -A Resource Extension is for processing a certain type of file or directory. For example, the built-in [jsResource](./built-in#jsresource) extension handles executing JavaScript files. - -Resource Extensions are comprised of four distinct function exports, [`handleFile()`](#handlefilecontents-urlpath-path-resources-void--promisevoid), [`handleDirectory()`](#handledirectoryurlpath-path-resources-boolean--void--promiseboolean--void), [`setupFile()`](#setupfilecontents-urlpath-path-resources-void--promisevoid), and [`setupDirectory()`](#setupdirectoryurlpath-path-resources-boolean--void--promiseboolean--void). The `handleFile()` and `handleDirectory()` methods are executed on **all worker threads**, and are _executed again during restarts_. The `setupFile()` and `setupDirectory()` methods are only executed **once** on the **main thread** during the initial system start sequence. - -> Keep in mind that the CLI command `harperdb restart` or CLI argument `restart=true` only restarts the worker threads. If a component is deployed using `harperdb deploy`, the code within the `setupFile()` and `setupDirectory()` methods will not be executed until the system is completely shutdown and turned back on. - -Other than their execution behavior, the `handleFile()` and `setupFile()` methods, and `handleDirectory()` and `setupDirectory()` methods have identical function definitions (arguments and return value behavior). - -#### Resource Extension Configuration - -Any [Resource Extension](#resource-extension) can be configured with the `files`, `path`, and `root` options. These options control how _files_ and _directories_ are resolved in order to be passed to the extension's `handleFile()`, `setupFile()`, `handleDirectory()`, and `setupDirectory()` methods. - -- `files` - `string` - _required_ - Specifies the set of files and directories that should be handled by the component. Can be a glob pattern. -- `path` - `string` - _optional_ - Specifies the URL path to be handled by the component. -- `root` - `string` - _optional_ - Specifies the root directory for mapping file paths to the URLs. - -For example, to configure the [static](./built-in#static) component to server all files from `web` to the root URL path: - -```yaml -static: - files: 'web/**' - root: 'web' -``` - -Or, to configure the [graphqlSchema](./built-in#graphqlschema) component to load all schemas within the `src/schema` directory: - -```yaml -graphqlSchema: - files: 'src/schema/*.schema' -``` - -#### Resource Extension API - -In order for an extension to be classified as a Resource Extension it must implement at least one of the `handleFile()`, `handleDirectory()`, `setupFile()`, or `setupDirectory()` methods. As a standalone extension, these methods should be named and exported directly. For example: - -```js -// ESM -export function handleFile() {} -export function setupDirectory() {} - -// or CJS -function handleDirectory() {} -function setupFile() {} - -module.exports = { handleDirectory, setupFile }; -``` - -When returned by a [Protocol Extension](#protocol-extension), these methods should be defined on the object instead: - -```js -export function start() { - return { - handleFile() {}, - }; -} -``` - -##### `handleFile(contents, urlPath, path, resources): void | Promise` - -##### `setupFile(contents, urlPath, path, resources): void | Promise` - -These methods are for processing individual files. They can be async. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `contents` - `Buffer` - The contents of the file -- `urlPath` - `string` - The recommended URL path of the file -- `path` - `string` - The relative path of the file - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `void | Promise` - -##### `handleDirectory(urlPath, path, resources): boolean | void | Promise` - -##### `setupDirectory(urlPath, path, resources): boolean | void | Promise` - -These methods are for processing directories. They can be async. - -If the function returns or resolves a truthy value, then the component loading sequence will end and no other entries within the directory will be processed. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `urlPath` - `string` - The recommended URL path of the file -- `path` - `string` - The relative path of the directory - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `boolean | void | Promise` - -### Protocol Extension - -A Protocol Extension is a more advanced form of a Resource Extension and is mainly used for implementing higher level protocols. For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) handles building and running a Next.js project. A Protocol Extension is particularly useful for adding custom networking handlers (see the [`server`](../../reference/globals#server) global API documentation for more information). - -#### Protocol Extension Configuration - -In addition to the `files`, `path`, and `root` [Resource Extension configuration](#resource-extension-configuration) options, and the `package` [Custom Component configuration](#custom-component-configuration) option, Protocol Extensions can also specify additional configuration options. Any options added to the extension configuration (in `config.yaml`), will be passed through to the `options` object of the `start()` and `startOnMainThread()` methods. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs#options) specifies multiple option that can be included in its configuration. For example, a Next.js app using `@harperdb/nextjs` may specify the following `config.yaml`: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - files: '/*' - prebuilt: true - dev: false -``` - -Many protocol extensions will use the `port` and `securePort` options for configuring networking handlers. Many of the [`server`](../../reference/globals#server) global APIs accept `port` and `securePort` options, so components replicated this for simpler pass-through. - -#### Protocol Extension API - -A Protocol Extension is made up of two distinct methods, [`start()`](#startoptions-resourceextension--promiseresourceextension) and [`startOnMainThread()`](#startonmainthreadoptions-resourceextension--promiseresourceextension). Similar to a Resource Extension, the `start()` method is executed on _all worker threads_, and _executed again on restarts_. The `startOnMainThread()` method is **only** executed **once** during the initial system start sequence. These methods have identical `options` object parameter, and can both return a Resource Extension (i.e. an object containing one or more of the methods listed above). - -##### `start(options): ResourceExtension | Promise` - -##### `startOnMainThread(options): ResourceExtension | Promise` - -Parameters: - -- `options` - `Object` - An object representation of the extension's configuration options. - -Returns: `Object` - An object that implements any of the [Resource Extension APIs](#resource-extension-api) diff --git a/versioned_docs/version-4.5/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.5/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index c4254430..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created Harper will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.5/developers/operations-api/bulk-operations.md deleted file mode 100644 index aef33230..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into Harper - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which Harper is running. For example, the path to a CSV on your computer will produce an error if your Harper instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running Harper - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/clustering-nats.md b/versioned_docs/version-4.5/developers/operations-api/clustering-nats.md deleted file mode 100644 index e6d9c0b2..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/clustering-nats.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: Clustering using NATS ---- - -# Clustering using NATS - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional Harper instance with associated subscriptions. Learn more about [Harper clustering here](../../reference/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about [Harper clustering here](../../reference/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Set Node Replication - -A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_node_replication` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: - - `database` _(optional)_ - the database to replicate from - - `table` _(required)_ - the table to replicate from - - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table -- - -### Body - -```json -{ - "operation": "set_node_replication", - "node_name": "node1", - "subscriptions": [ - { - "table": "dog", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about [Harper clustering here](../../reference/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a Harper instance and associated subscriptions from the cluster. Learn more about [Harper clustering here](../../reference/clustering/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `node_name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Purge Stream - -Will purge messages from a stream - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `purge_stream` -- `database` _(required)_ - the name of the database where the streams table resides -- `table` _(required)_ - the name of the table that belongs to the stream -- `options` _(optional)_ - control how many messages get purged. Options are: - - `keep` - purge will keep this many most recent messages - - `seq` - purge all messages up to, but not including, this sequence - -### Body - -```json -{ - "operation": "purge_stream", - "database": "dev", - "table": "dog", - "options": { - "keep": 100 - } -} -``` - ---- diff --git a/versioned_docs/version-4.5/developers/operations-api/clustering.md b/versioned_docs/version-4.5/developers/operations-api/clustering.md deleted file mode 100644 index 8fc5ae49..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/clustering.md +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -The following operations are available for configuring and managing [Harper replication](../replication/). - -_**If you are using NATS for clustering, please see the**_ [_**NATS Clustering Operations**_](./clustering-nats) _**documentation.**_ - -## Add Node - -Adds a new Harper instance to the cluster. If `subscriptions` are provided, it will also create the replication relationships between the nodes. If they are not provided a fully replicating system will be created. [Learn more about adding nodes here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `hostname` or `url` _(required)_ - one of these fields is required. You must provide either the `hostname` or the `url` of the node you want to add -- `verify_tls` _(optional)_ - a boolean which determines if the TLS certificate should be verified. This will allow the Harper default self-signed certificates to be accepted. Defaults to `true` -- `authorization` _(optional)_ - an object or a string which contains the authorization information for the node being added. If it is an object, it should contain `username` and `password` fields. If it is a string, it should use HTTP `Authorization` style credentials -- `retain_authorization` _(optional)_ - a boolean which determines if the authorization credentials should be retained/stored and used everytime a connection is made to this node. If `true`, the authorization will be stored on the node record. Generally this should not be used, as mTLS/certificate based authorization is much more secure and safe, and avoids the need for storing credentials. Defaults to `false`. -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(optional)_ - The relationship created between nodes. If not provided a fully replicated cluster will be setup. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate - - `table` - the table to replicate - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'server-two' to cluster" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance in the cluster. - -_Operation is restricted to super_user roles only_ - -_Note: will attempt to add the node if it does not exist_ - -- `operation` _(required)_ - must always be `update_node` -- `hostname` _(required)_ - the `hostname` of the remote node you are updating -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'server-two'" -} -``` - ---- - -## Remove Node - -Removes a Harper node from the cluster and stops replication, [Learn more about remove node here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are removing - -### Body - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'server-two' from cluster" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. - -`database_sockets` shows the actual websocket connections that exist between nodes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "type": "cluster-status", - "connections": [ - { - "replicateByDefault": true, - "replicates": true, - "url": "wss://server-2.domain.com:9933", - "name": "server-2.domain.com", - "subscriptions": null, - "database_sockets": [ - { - "database": "data", - "connected": true, - "latency": 0.7, - "thread_id": 1, - "nodes": ["server-2.domain.com"], - "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", - "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", - "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", - "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" - } - ] - } - ], - "node_name": "server-1.domain.com", - "is_enabled": true -} -``` - -There is a separate socket for each database for each node. Each node is represented in the connections array, and each database connection to that node is represented in the `database_sockets` array. Additional timing statistics include: - -- `lastCommitConfirmed`: When a commit is sent out, it should receive a confirmation from the remote server; this is the last receipt of confirmation of an outgoing commit. -- `lastReceivedRemoteTime`: This is the timestamp of the transaction that was last received. The timestamp is from when the original transaction occurred. -- `lastReceivedLocalTime`: This is local time when the last transaction was received. If there is a different between this and `lastReceivedRemoteTime`, it means there is a delay from the original transaction to \* receiving it and so it is probably catching-up/behind. -- `sendingMessage`: The timestamp of transaction is actively being sent. This won't exist if the replicator is waiting for the next transaction to send. - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object following the `add_node` schema. - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password2" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "my-table", - "subscribe": true, - "publish": false - } - ] - }, - { - "hostname": "server-three", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password3" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Cluster Set Routes - -Adds a route/routes to the `replication.routes` configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `routes` _(required)_ - the routes field is an array that specifies the routes for clustering. Each element in the array can be either a string or an object with `hostname` and `port` properties. - -### Body - -```json -{ - "operation": "cluster_set_routes", - "routes": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets the replication routes from the Harper config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -[ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } -] -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from the Harper config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/components.md b/versioned_docs/version-4.5/developers/operations-api/components.md deleted file mode 100644 index 74740020..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/components.md +++ /dev/null @@ -1,512 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a predefined template. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -If deploying with the `payload` option, Harper will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory. - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have an installed SSH keys on the server: - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string -- `restart` _(optional)_ - must be either a boolean or the string `rolling`. If set to `rolling`, a rolling restart will be triggered after the component is deployed, meaning that each node in the cluster will be sequentially restarted (waiting for the last restart to start the next). If set to `true`, the restart will not be rolling, all nodes will be restarted in parallel. If `replicated` is `true`, the restart operations will be replicated across the cluster. -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. This can be used to install dependencies with pnpm or yarn, for example, like: `"install_command": "npm install -g pnpm && pnpm install"` - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template", - "replicated": true -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete -- `replicated` _(optional)_ - if true, Harper will replicate the component deletion to all nodes in the cluster. Must be a boolean. -- `restart` _(optional)_ - if true, Harper will restart after dropping the component. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` -- `replicated` _(optional)_ - if true, Harper will replicate the component update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` - -## Add SSH Key - -Adds an SSH key for deploying components from private repositories. This will also create an ssh config file that will be used when deploying the components. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_ssh_key` -- `name` _(required)_ - the name of the key -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` -- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. - _Operation is restricted to super_user roles only_ - -### Body - -```json -{ - "operation": "add_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nfake\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Added ssh key: harperdb-private-component" -} -``` - -### Generated Config and Deploy Component "package" string examples - -``` -#harperdb-private-component -Host harperdb-private-component.github.com - HostName github.com - User git - IdentityFile /hdbroot/ssh/harperdb-private-component.key - IdentitiesOnly yes -``` - -``` -"package": "git+ssh://git@:.git#semver:v1.2.3" - -"package": "git+ssh://git@harperdb-private-component.github.com:HarperDB/harperdb-private-component.git#semver:v1.2.3" -``` - -Note that `deploy_component` with a package uses `npm install` so the url must be a valid npm format url. The above is an example of a url using a tag in the repo to install. - -## Update SSH Key - -Updates the private key contents of an existing SSH key. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_ssh_key` -- `name` _(required)_ - the name of the key to be updated -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `replicated` _(optional)_ - if true, Harper will replicate the key update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "update_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nNEWFAKE\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Updated ssh key: harperdb-private-component" -} -``` - -## Delete SSH Key - -Deletes a SSH key. This will also remove it from the generated SSH config. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_ssh_key` -- `name` _(required)_ - the name of the key to be deleted -- `replicated` _(optional)_ - if true, Harper will replicate the key deletion to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "name": "harperdb-private-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Deleted ssh key: harperdb-private-component" -} -``` - -## List SSH Keys - -List off the names of added SSH keys - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_ssh_keys` - -### Body - -```json -{ - "operation": "list_ssh_keys" -} -``` - -### Response: 200 - -```json -[ - { - "name": "harperdb-private-component" - } -] -``` - -_Note: Additional SSH keys would appear as more objects in this array_ - -## Set SSH Known Hosts - -Sets the SSH known_hosts file. This will overwrite the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_ssh_known_hosts` -- `known_hosts` _(required)_ - The contents to set the known_hosts to. Line breaks must be delimite d with -- `replicated` _(optional)_ - if true, Harper will replicate the known hosts to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_ssh_known_hosts", - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Known hosts successfully set" -} -``` - -## Get SSH Known Hosts - -Gets the contents of the known_hosts file - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_ssh_known_hosts` - -### Body - -```json -{ - "operation": "get_ssh_known_hosts" -} -``` - -### Response: 200 - -```json -{ - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/custom-functions.md b/versioned_docs/version-4.5/developers/operations-api/custom-functions.md deleted file mode 100644 index 2c469bf4..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,281 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -:::warning Deprecated -These operations are deprecated. -::: - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDStudio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. Harper Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.5/developers/operations-api/databases-and-tables.md deleted file mode 100644 index 7c17fb4d..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts about 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** - -_Note: Harper will automatically create new attributes on insert and update if they do not already exist within the database._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with Harper not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (Harper actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup Harper databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/index.md b/versioned_docs/version-4.5/developers/operations-api/index.md deleted file mode 100644 index 09d2329f..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/index.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling Harper. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../../deployments/configuration#operationsapi), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](../../developers/security#basic-auth) or [JWT authentication](../../developers/security#jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Clustering with NATS](operations-api/clustering-nats) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [Utilities](operations-api/utilities) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/jobs.md b/versioned_docs/version-4.5/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/logs.md b/versioned_docs/version-4.5/developers/operations-api/logs.md deleted file mode 100644 index 52e52740..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/logs.md +++ /dev/null @@ -1,732 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read Harper Log - -Returns log outputs from the primary Harper log based on the provided search criteria. [Read more about Harper logging here](../../administration/logging/standard-logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` -- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. [Read more about Harper transaction logs here](./logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the Harper configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. [Read more about Harper transaction logs here.](../../administration/logging/transaction-logging#read_transaction_log) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. [Read more about Harper transaction logs here](./logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values \[`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the Harper user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the Harper configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.5/developers/operations-api/nosql-operations.md deleted file mode 100644 index 11a99ab6..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,384 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID or incremented number (depending on type) will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `upsert` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `search_attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_value` _(required)_ - value you wish to search - wild cards are allowed -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "search_attribute": "owner_name", - "search_value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: - - `attribute` _(required)_ - The attribute to sort by - - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) - - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: - - `search_attribute` _(required)_ - the attribute you wish to search, can be any attribute - - `search_type` _(required)_ - the type of search to perform - `equals`, `not_equal`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_value` _(required)_ - case-sensitive value you wish to search. If the `search_type` is `between` then use an array of two values to search between (both inclusive) - Or a set of grouped conditions has the following properties: - - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` - - `conditions` _(required)_ - the array of conditions objects as described above. - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "sort": { - "attribute": "id", - "next": { - "attribute": "age", - "descending": true - } - }, - "get_attributes": ["*"], - "conditions": [ - { - "search_attribute": "age", - "search_type": "between", - "search_value": [5, 8] - }, - { - "search_attribute": "weight_lbs", - "search_type": "greater_than", - "search_value": 40 - }, - { - "operator": "or", - "conditions": [ - { - "search_attribute": "adorable", - "search_type": "equals", - "search_value": true - }, - { - "search_attribute": "lovable", - "search_type": "equals", - "search_value": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.5/developers/operations-api/quickstart-examples.md deleted file mode 100644 index a6c8f637..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -Harper recommends utilizing [Harper Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the Harper Operations API. - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in Harper are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. - -Harper does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and Harper will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -Harper supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/registration.md b/versioned_docs/version-4.5/developers/operations-api/registration.md deleted file mode 100644 index 28c6a0e9..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/registration.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the Harper instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Get Fingerprint - -Returns the Harper fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -Sets the Harper license as generated by Harper License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/sql-operations.md b/versioned_docs/version-4.5/developers/operations-api/sql-operations.md deleted file mode 100644 index 4b7076bb..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: SQL Operations ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/token-authentication.md b/versioned_docs/version-4.5/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.5/developers/operations-api/users-and-roles.md deleted file mode 100644 index 91f222b9..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "developer" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. Harper will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your Harper instance. If set to false, user will not be able to access your instance of Harper. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. Harper will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your Harper instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/versioned_docs/version-4.5/developers/operations-api/utilities.md b/versioned_docs/version-4.5/developers/operations-api/utilities.md deleted file mode 100644 index 6d24031c..00000000 --- a/versioned_docs/version-4.5/developers/operations-api/utilities.md +++ /dev/null @@ -1,463 +0,0 @@ ---- -title: Utilities ---- - -# Utilities - -## Restart - -Restarts the Harper instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified Harper service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` -- `replicated` _(optional)_ - must be a boolean. If set to `true`, Harper will replicate the restart service operation across all nodes in the cluster. The restart will occur as a rolling restart, ensuring that each node is fully restarted before the next node begins restarting. - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'metrics', 'threads', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` - ---- - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` -- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Install Node Modules - -:::warning Deprecated -This operation is deprecated, as it is handled automatically by [deploy_component](./components#deploy-component) and [restart](#restart). -::: - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` - ---- - -## Set Configuration - -Modifies the Harper configuration file parameters. Must follow with a [restart](#restart) or [restart_service](#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the Harper configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "replication": { - "hostname": "node1", - "databases": "*", - "routes": null, - "url": "wss://127.0.0.1:9925" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` - ---- - -## Add Certificate - -Adds or updates a certificate in the `hdb_certificate` system table. -If a `private_key` is provided it will **not** be stored in `hdb_certificate`, it will be written to file in `/keys/`. -If a `private_key` is not passed the operation will search for one that matches the certificate. If one is not found an error will be returned. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_certificate` -- `name` _(required)_ - a unique name for the certificate -- `certificate` _(required)_ - a PEM formatted certificate string -- `is_authority` _(required)_ - a boolean indicating if the certificate is a certificate authority -- `hosts` _(optional)_ - an array of hostnames that the certificate is valid for -- `private_key` _(optional)_ - a PEM formatted private key string - -### Body - -```json -{ - "operation": "add_certificate", - "name": "my-cert", - "certificate": "-----BEGIN CERTIFICATE-----ZDFAay... -----END CERTIFICATE-----", - "is_authority": false, - "private_key": "-----BEGIN RSA PRIVATE KEY-----Y4dMpw5f... -----END RSA PRIVATE KEY-----" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added certificate: my-cert" -} -``` - ---- - -## Remove Certificate - -Removes a certificate from the `hdb_certificate` system table and deletes the corresponding private key file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_certificate` -- `name` _(required)_ - the name of the certificate - -### Body - -```json -{ - "operation": "remove_certificate", - "name": "my-cert" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed my-cert" -} -``` - ---- - -## List Certificates - -Lists all certificates in the `hdb_certificate` system table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_certificates` - -### Body - -```json -{ - "operation": "list_certificates" -} -``` - -### Response: 200 - -```json -[ - { - "name": "HarperDB-Certificate-Authority-node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\nTANBgkqhk... S34==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": true, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "serial_number": "5235345", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - }, - { - "name": "node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\ngIEcSR1M... 5bv==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": false, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=node.1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject_alt_name": "IP Address:127.0.0.1, DNS:localhost, IP Address:0:0:0:0:0:0:0:1, DNS:node.1", - "serial_number": "5243646", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - } -] -``` diff --git a/versioned_docs/version-4.5/developers/real-time.md b/versioned_docs/version-4.5/developers/real-time.md deleted file mode 100644 index 9c5c79e4..00000000 --- a/versioned_docs/version-4.5/developers/real-time.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -Harper provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. Harper supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -Harper real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a Harper application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -Harper is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -Harper supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in Harper is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -Harper supports MQTT with its `mqtt` server module and Harper supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - mTLS: false - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). - -#### Capabilities - -Harper's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In Harper topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -Harper supports QoS 0 and 1 for publishing and subscribing. - -Harper supports multi-level topics, both for subscribing and publishing. Harper also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. Harper currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -#### Events - -JavaScript components can also listen for MQTT events. This is available on the server.mqtt.events object. For example, to set up a listener/callback for when MQTT clients connect and authorize, we can do: - -```javascript -server.mqtt.events.on('connected', (session, socket) => { - console.log('client connected with id', session.clientId); -}); -``` - -The following MQTT events are available: - -- `connection` - When a client initially establishes a TCP or WS connection to the server -- `connected` - When a client establishes an authorized MQTT connection -- `auth-failed` - When a client fails to authenticate -- `disconnected` - When a client disconnects from the server - -### Ordering - -Harper is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. Harper is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, Harper does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because Harper prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and Harper will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Keep-Alive monitoring | :heavy_check_mark: | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties retain handling | :heavy_check_mark: | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.5/developers/replication/index.md b/versioned_docs/version-4.5/developers/replication/index.md deleted file mode 100644 index fbe8d62c..00000000 --- a/versioned_docs/version-4.5/developers/replication/index.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -title: Replication/Clustering ---- - -# Replication/Clustering - -Harper’s replication system is designed to make distributed data replication fast and reliable across multiple nodes. This means you can easily build a distributed database that ensures high availability, disaster recovery, and data localization. The best part? It’s simple to set up, configure, and manage. You can easily add or remove nodes, choose which data to replicate, and monitor the system’s health without jumping through hoops. - -### Replication Overview - -Harper replication uses a peer-to-peer model where every node in your cluster can send and subscribe to data. Each node connects through WebSockets, allowing data to flow seamlessly in both directions. By default, Harper takes care of managing these connections and subscriptions, so you don’t have to worry about data consistency. The system is designed to maintain secure, reliable connections between nodes, ensuring that your data is always safe. - -### Replication Configuration - -To connect your nodes, you need to provide hostnames or URLs for the nodes to connect to each other. This can be done via configuration or through operations. To configure replication, you can specify connection information the `replication` section of the [harperdb-config.yaml](../../deployments/configuration). Here, you can specify the host name of the current node, and routes to connect to other nodes, for example: - -```yaml -replication: - hostname: server-one - routes: - - server-two - - server-three -``` - -In this example, the current node is `server-one`, and it will connect to `server-two` and `server-three`. Routes to other nodes can also be configured with URLs or ports: - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9933 # URL based route - - hostname: server-three # define a hostname and port - port: 9933 -``` - -You can also use the [operations API](./operations-api/clustering) to dynamically add and remove nodes from the cluster. This is useful for adding new nodes to a running cluster or removing nodes that are no longer needed. For example (note this is the basic form, you would also need to provide the necessary credentials for the operation, see the section on securing connections for more details): - -```json -{ - "operation": "add_node", - "hostname": "server-two" -} -``` - -These operations will also dynamically generating certificates as needed, if there are no existing signed certificates, or if the existing certificates are not valid for the new node. - -Harper will also automatically replicate node information to other nodes in a cluster ([gossip-style discovery](https://highscalability.com/gossip-protocol-explained/)). This means that you only need to connect to one node in an existing cluster, and Harper will automatically detect and connect to other nodes in the cluster (bidirectionally). - -By default, Harper will replicate all the data in all the databases. You can configure which databases are replicated, and then override this behavior on a per-table basis. For example, you can indicate which databases should be replicated by default, here indicating you want to replicate the `data` and `system` databases: - -```yaml -replication: - databases: - - data - - system -``` - -By default, all tables within a replicated database will be replicated. Transactions are replicated atomically, which may involve data across multiple tables. However, you can also configure replication for individual tables, and disable and exclude replication for specific tables in a database by setting `replicate` to `false` in the table definition: - -```graphql -type LocalTableForNode @table(replicate: false) { - id: ID! - name: String! -} -``` - -You can also control which nodes data is replicated to, and how many nodes data is replicated to. By default, Harper will replicate data to all nodes in the cluster, but you can control where data is replicated to with the [sharding configuration and APIs](replication/sharding). - -By default, replication connects to the secure port 9933. You can configure the replication port in the `replication` section. - -```yaml -replication: - securePort: 9933 -``` - -### Securing Connections - -Harper supports the highest levels of security through public key infrastructure based security and authorization. Depending on your security configuration, you can configure Harper in several different ways to build a connected cluster. - -#### Provide your own certificates - -If you want to secure your Harper connections with your own signed certificates, you can easily do so. Whether you have certificates from a public authority (like Let's Encrypt or Digicert) or a corporate certificate authority, you can use them to authenticate nodes securely. You can then allow nodes to authorize each other by checking the certificate against the standard list of root certificate authorities by enabling the `enableRootCAs` option in the config: - -``` -replication - enableRootCAs: true -``` - -And then just make sure the certificate’s common name (CN) matches the node's hostname. - -#### Setting Up Custom Certificates - -There are two ways to configure Harper with your own certificates: - -1. Use the `add_certificate` operation to upload them. -1. Or, specify the certificate paths directly in the `replication` section of the `harperdb-config.yaml` file. - -If your certificate is signed by a trusted public authority, just provide the path to the certificate and private key. If you're using self-signed certificates or a private certificate authority, you’ll also need to provide the certificate authority (CA) details to complete the setup.\ -\ -Example configuration: - -```yaml -tls: - certificate: /path/to/certificate.pem - certificateAuthority: /path/to/ca.pem - privateKey: /path/to/privateKey.pem -``` - -With this in place, Harper will load the provided certificates into the certificate table and use these to secure and authenticate connections between nodes. - -You have the option to skip providing a specific certificate authority (CA) and instead verify your certificate against the root certificates included in the bundled Mozilla CA store. This bundled CA store, provided by Node.js, is a snapshot of Mozilla's CA certificates that is fixed at the time of each Node.js release. - -To enable the root certificates set `replication.enableRootCAs` to `true` in the `harperdb-config.yaml` file: - -```yaml -replication: - enableRootCAs: true -``` - -#### Cross-generated certificates - -Harper can also generate its own certificates for secure connections. This is useful for setting up secure connections between nodes when no existing certificates are available, and can be used in development, testing, or production environments. Certificates will be automatically requested and signed between nodes to support a form of distributed certificate generation and signing. To establish secure connections between nodes using cross-generated certificates, you simply use the [`add_node` operation](./operations-api/clustering) over SSL, and specify the temporary authentication credentials to use for connecting and authorizing the certificate generation and signing. \ -\ -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -When you connect to another node (e.g., `server-two`), Harper uses secure WebSockets and the provided credentials to establish the connection. - -If you’re working with a fresh install, you’ll need to set `verify_tls` to `false` temporarily, so the self-signed certificate is accepted. Once the connection is made, Harper will automatically handle the certificate signing process: - -- It creates a certificate signing request (CSR), sends it to `server-two`, which then signs it and returns the signed certificate along with the certificate authority (CA). -- The signed certificate is stored for future connections between the nodes, ensuring secure communication. - -**Important:** Your credentials are not stored—they are discarded immediately after use. - -You can also provide credentials in HTTP Authorization format (Basic auth, Token auth, or JWT). This is helpful for handling authentication with the required permissions to generate and sign certificates. - -Additionally, you can use `set_node` as an alias for the `add_node` operation if you prefer. - -#### Revoking Certificates - -Certificates used in replication can be revoked by using the certificate serial number and either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config in `harperdb-config.yaml`. - -To utilize the `revoked_certificates` attribute in the `hdb_nodes` table, you can use the `add_node` or `update_node` operation to add the certificate serial number to the `revoked_certificates` array. For example: - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "revoked_certificates": ["1769F7D6A"] -} -``` - -To utilize the replication route config in `harperdb-config.yaml`, you can add the certificate serial number to the `revokedCertificates` array. For example: - -```yaml -replication: - routes: - - hostname: server-three - port: 9930 - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -#### Removing Nodes - -Nodes can be removed from the cluster using the [`remove_node` operation](./operations-api/clustering). This will remove the node from the cluster, and stop replication to and from the node. For example: - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -#### Insecure Connection IP-based Authentication - -You can completely disable secure connections and use IP addresses to authenticate nodes with each other. This can be useful for development and testing, or within a secure private network, but should never be used for production with publicly accessible servers. To disable secure connections, simply configure replication within an insecure port, either by [configuring the operations API](../../deployments/configuration) to run on an insecure port or replication to run on an insecure port. And then set up IP-based routes to connect to other nodes: - -```yaml -replication: - port: 9933 - routes: - - 127.0.0.2 - - 127.0.0.3 -``` - -Note that in this example, we are using loop back addresses, which can be a convenient way to run multiple nodes on a single machine for testing and development. - -#### Explicit Subscriptions - -#### Managing Node Connections and Subscriptions in Harper - -By default, Harper automatically handles connections and subscriptions between nodes, ensuring data consistency across your cluster. It even uses data routing to manage node failures. But if you want more control, you can manage these connections manually by explicitly subscribing to nodes. This is useful for advanced configurations, testing, or debugging. - -#### Important Notes on Explicit Subscriptions - -If you choose to manage subscriptions manually, Harper will no longer handle data consistency for you. This means there’s no guarantee that all nodes will have consistent data if subscriptions don’t fully replicate in all directions. If a node goes down, it’s possible that some data wasn’t replicated before the failure. - -#### How to Subscribe to Nodes - -To explicitly subscribe to a node, you can use operations like `add_node` and define the subscriptions. For example, you can configure a node (e.g., `server-two`) to publish transactions on a specific table (e.g., `dev.my-table`) without receiving data from that node. - -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": false - } - ] -} -``` - -To update an explicit subscription you can use the [`update_node` operation](./operations-api/clustering). - -Here we are updating the subscription to receive transactions on the `dev.my-table` table from the `server-two` node. - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": true - } - ] -} -``` - -#### Monitoring Replication - -You can monitor the status of replication through the operations API. You can use the [`cluster_status` operation](./operations-api/clustering) to get the status of replication. For example: - -```json -{ - "operation": "cluster_status" -} -``` - -#### Database Initial Synchronization and Resynchronization - -When a new node is added to the cluster, if its database has not previously been synced, it will initially download the database from the first node it connects to. This will copy every record from the source database to the new node. Once the initial synchronization is complete, the new node will enter replication mode and receive records from each node as they are created, updated, or deleted. If a node goes down and comes back up, it will also resynchronize with the other nodes in the cluster, to ensure that it has the most up-to-date data. - -You may also specify a `start_time` in the `add_node` to specify that when a database connects, that it should not download the entire database, but only data since a given starting time. - -**Advanced Configuration** - -You can also check the configuration of the replication system, including the current known nodes and certificates, by querying the hdb_nodes and hdb_certificate table: - -```json -{ - "operation": "search_by_value", - "database": "system", - "table": "hdb_nodes", - "search_attribute": "name", - "search_value": "*" -} -``` diff --git a/versioned_docs/version-4.5/developers/replication/sharding.md b/versioned_docs/version-4.5/developers/replication/sharding.md deleted file mode 100644 index f22237b8..00000000 --- a/versioned_docs/version-4.5/developers/replication/sharding.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: Sharding ---- - -Harper's replication system supports various levels of replication or sharding. Harper can be configured or set up to replicate to different data to different subsets of nodes. This can be used facilitate horizontally scalability of storage and write performance, while maintaining optimal strategies of data locality and data consistency. When sharding is configured, Harper will replicate data to only a subset of nodes, based on the sharding configuration, and can then retrieve data from the appropriate nodes as needed to fulfill requests for data. - -There are two main ways to setup sharding in Harper. The approach is to use dynamic sharding, where the location or residency of records is determined dynamically based on where the record was written and record data, and records can be dynamically relocated based on where they are accessed. This residency information can be specific to each record, and can vary based on the computed residency and where the data is written and accessed. - -The second approach is define specific shards, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed, or content. This approach is more static, but can be more efficient for certain use cases, and means that the location of data can always be predictably determined based on the primary key. - -## Configuration For Dynamic Sharding - -By default, Harper will replicate all data to all nodes. However, replication can easily be configured for "sharding", or storing different data in different locations or nodes. The simplest way to configure sharding and limit replication to improve performance and efficiency is to configure a replication-to count. This will limit the number of nodes that data is replicated to. For example, to specify that writes should replicate to 2 other nodes besides the node that first stored the data, you can set the `replicateTo` to 2 in the `replication` section of the `harperdb-config.yaml` file: - -```yaml -replication: - replicateTo: 2 -``` - -This will ensure that data is replicated to two other nodes, so that each record will be stored on three nodes in total. - -With a sharding configuration (or customization below) in place, requests will for records that don't reside on the server handling requests will automatically be forwarded to the appropriate node. This will be done transparently, so that the client will not need to know where the data is stored. - -## Replication Control with Headers - -With the REST interface, replication levels and destinations can also specified with the `X-Replicate-To` header. This can be used to indicate the number of additional nodes that data should be replicated to, or to specify the nodes that data should be replicated to. The `X-Replicate-To` header can be used with the `POST` and `PUT` methods. This header can also specify if the response should wait for confirmation from other nodes, and how many, with the `confirm` parameter. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: 2;confirm=1 - -... -``` - -You can also explicitly specify destination nodes by providing a comma-separated list of node hostnames. For example, to specify that data should be replicated to nodes `node1` and `node2`, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: node1,node2 -``` - -(This can also be used with the `confirm` parameter.) - -## Replication Control with Operations - -Likewise, you can specify replicateTo and confirm parameters in the operation object when using the Harper API. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following operation object: - -```jsonc -{ - "operation": "update", - "schema": "dev", - "table": "MyTable", - "hashValues": [3], - "record": { - "name": "John Doe", - }, - "replicateTo": 2, - "replicatedConfirmation": 1, -} -``` - -or you can specify nodes: - -```jsonc -{ - // ... - "replicateTo": ["node-1", "node-2"], - // ... -} -``` - -## Programmatic Replication Control - -Additionally, you can specify `replicateTo` and `replicatedConfirmation` parameters programmatically in the context of a resource. For example, you can define a put method: - -```javascript -class MyTable extends tables.MyTable { - put(record) { - const context = this.getContext(); - context.replicateTo = 2; // or an array of node names - context.replicatedConfirmation = 1; - return super.put(record); - } -} -``` - -## Configuration for Static Sharding - -Alternatively, you can configure static sharding, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key. The `shard` is identified by a number. To configure the shard for each node, you can specify the shard number in the `replication`'s `shard` in the configuration: - -```yaml -replication: - shard: 1 -``` - -Alternatively, you can configure the `shard` under the `replication` `routes`. This allows you to assign a specific shard id based on the routing configuration. - -```yaml -replication: - routes: - - hostname: node1 - shard: 1 - - hostname: node2 - shard: 2 -``` - -Or you can specify a `shard` number by including that property in an `add_node` operation or `set_node` operation, to dynamically assign a node to a shard. - -You can then specify shard number in the `setResidency` or `setResidencyById` functions below. - -## Custom Sharding - -You can also define a custom sharding strategy by specifying a function to compute the "residency" or location of where records should be stored and reside. To do this we use the `setResidency` method, providing a function that will determine the residency of each record. The function you provide will be called with the record entry, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the `id` field, you can use the following code: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? ['node1'] : ['node2']; -}); -``` - -With this approach, the record metadata, which includes the residency information, and any indexed properties, will be replicated to all nodes, but the full record will only be replicated to the nodes specified by the residency function. - -The `setResidency` function can alternately return a shard number, which will replicate the data to all the nodes in that shard: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? 1 : 2; -}); -``` - -### Custom Sharding By Primary Key - -Alternately you can define a custom sharding strategy based on the primary key alone. This allows records to be retrieved without needing access to the record data or metadata. With this approach, data will only be replicated to the nodes specified by the residency function (the record metadata doesn't need to replicated to all nodes). To do this, you can use the `setResidencyById` method, providing a function that will determine the residency or shard of each record based on the primary key. The function you provide will be called with the primary key, and should return a `shard` number or an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the primary key, you can use the following code: - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? 1 : 2; // return shard number -}); -``` - -or - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? ['node1'] : ['node2']; // return array of node hostnames -}); -``` - -### Disabling Cross-Node Access - -Normally sharding allows data to be stored in specific nodes, but still allows access to the data from any node. However, you can also disable cross-node access so that data is only returned if is stored on the node where it is accessed. To do this, you can set the `replicateFrom` property on the context of operation to `false`: - -```jsonc -{ - "operation": "search_by_id", - "table": "MyTable", - "ids": [3], - "replicateFrom": false, -} -``` - -Or use a header with the REST API: - -```http -GET /MyTable/3 -X-Replicate-From: none -``` diff --git a/versioned_docs/version-4.5/developers/rest.md b/versioned_docs/version-4.5/developers/rest.md deleted file mode 100644 index ee44325b..00000000 --- a/versioned_docs/version-4.5/developers/rest.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: REST ---- - -# REST - -## REST - -Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](./applications/) and [defining schemas](./applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](./applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -### GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -#### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -##### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -#### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -#### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. Note that this will only work for properties that are declared in the schema. - -### PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -#### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -### DELETE - -This can be used to delete a record or records. - -### `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -### `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -### POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -#### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -### Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in Harper. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the attributes needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /Product/?category=software&price=gt=100&price=lt=200 -``` - -Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: - -``` -GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z -``` - -You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: - -```http -GET /Product/?name==Keyboard* -``` - -**Chained Conditions** - -You can also specify that a range condition must be met for a single attribute value by chaining conditions. This is done by omitting the name in the name-value pair. For example, to find products with a price between 100 and 200, you could write: - -```http -GET /Product/?price=gt=100<=200 -``` - -Chaining can be used to combined `gt` or `ge` with `lt` or `le` to specify a range of values. Currently, no other types of chaining are supported. - -Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. - -Here is a full list of the supported FIQL-style operators/comparators: - -- `==`: equal -- `=lt=`: less than -- `=le=`: less than or equal -- `=gt=`: greater than -- `=ge=`: greater than or equal -- `=ne=`, !=: not equal -- `=ct=`: contains the value (for strings) -- `=sw=`, `==*`: starts with the value (for strings) -- `=ew=`: ends with the value (for strings) -- `=`, `===`: strict equality (no type conversion) -- `!==`: strict inequality (no type conversion) - -#### Unions - -Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: - -```http -GET /Product/?rating=5|featured=true -``` - -#### Grouping of Operators - -Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: - -```http -GET /Product/?rating=5|(price=gt=100&price=lt=200) -``` - -Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: - -```http -GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] -``` - -And the tags could be safely generated from user inputs in a tag array like: - -```javascript -let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; -``` - -More complex queries can be created by further nesting groups: - -```http -GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] -``` - -### Query Calls - -Harper has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -#### `select(properties)` - -This function allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. -- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. - -To get a list of product names with a category of software: - -```http -GET /Product/?category=software&select(name) -``` - -#### `limit(start,end)` or `limit(end)` - -This function specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -#### `sort(property)`, `sort(+property,-property,...)` - -This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. - -For example, to sort by product name (in ascending order): - -```http -GET /Product?rating=gt=3&sort(+name) -``` - -To sort by rating in ascending order, then by price in descending order for products with the same rating: - -```http -GET /Product?sort(+rating,-price) -``` - -## Relationships - -Harper supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. - -### Chained Attributes and Joins - -To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides Harper's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - brandId: ID @indexed - brand: Brand @relationship(from: "brandId") -} -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: "brandId") -} -``` - -And then you could query a product by brand name: - -```http -GET /Product/?brand.name=Microsoft -``` - -This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. - -The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: - -```http -GET /Brand/?products.name=Keyboard -``` - -This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". - -#### Chained/Nested Select - -Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand) -``` - -We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand{name}) -``` - -Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: - -```http -GET /Product/?name=Keyboard&select(name,brand{name,id}) -``` - -When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. - -#### Many-to-many Relationships (Array of Foreign Keys) - -Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - resellerIds: [ID] @indexed - resellers: [Reseller] @relationship(from: "resellerId") -} -type Reseller @table { - id: ID @primaryKey - name: String - ... -} -``` - -The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": - -```http -GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) -``` - -One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): - -```http -PUT /Product/123 -Content-Type: application/json - -{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], -...} -``` - -#### Type Conversion - -Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: - -- `name==null`: Will convert the value to `null` for searching. -- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==number:123`: Will explicitly convert the value after "number:" to a number. -- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. -- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) -- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. - -If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. - -For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. - -#### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, Harper supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -#### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/versioned_docs/version-4.5/developers/security/basic-auth.md b/versioned_docs/version-4.5/developers/security/basic-auth.md deleted file mode 100644 index 9bc0160c..00000000 --- a/versioned_docs/version-4.5/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -Harper uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -** \_**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.**\_ ** - -A header is added to each HTTP request. The header key is **"Authorization"** the header value is **"Basic <<your username and password buffer token>>"** - -## Authentication in Harper Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for Harper. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.5/developers/security/certificate-management.md b/versioned_docs/version-4.5/developers/security/certificate-management.md deleted file mode 100644 index 43209e05..00000000 --- a/versioned_docs/version-4.5/developers/security/certificate-management.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for Harper external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of Harper does not have HTTPS enabled (see [configuration](../../deployments/configuration#http) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -By default Harper will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your Harper node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable Harper HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the Harper configuration with the path of your new certificate files, and then restart Harper. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set Harper will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### mTLS - -Mutual TLS (mTLS) is a security protocol that requires both the client and the server to present certificates to each other. Requiring a client certificate can be useful for authenticating clients and ensuring that only authorized clients can access your Harper instance. This can be enabled by setting the `http.mtls` configuration in `harperdb-config.yaml` to `true` and providing a certificate authority in the TLS section: - -```yaml - -http: - mtls: true - ... -tls: - certificateAuthority: ~/hdb/keys/ca.pem - ... -``` - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for Harper, Nginx can be used as a reverse proxy for Harper. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to Harper as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for Harper, a number of different external services can be used as a reverse proxy for Harper. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to Harper as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for Harper administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.5/developers/security/configuration.md b/versioned_docs/version-4.5/developers/security/configuration.md deleted file mode 100644 index 19251614..00000000 --- a/versioned_docs/version-4.5/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -Harper was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with Harper. - -## CORS - -Harper allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, Harper enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harpersystems.dev,https://products.harpersystems.dev` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -HarperDprovides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -Harper automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from Harper Operations API.** diff --git a/versioned_docs/version-4.5/developers/security/index.md b/versioned_docs/version-4.5/developers/security/index.md deleted file mode 100644 index 723db452..00000000 --- a/versioned_docs/version-4.5/developers/security/index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Security ---- - -# Security - -Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they’re supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -- [JWT Authentication](security/jwt-auth) -- [Basic Authentication](security/basic-auth) -- [mTLS Authentication](security/mtls-auth) -- [Configuration](security/configuration) -- [Users and Roles](security/users-and-roles) diff --git a/versioned_docs/version-4.5/developers/security/jwt-auth.md b/versioned_docs/version-4.5/developers/security/jwt-auth.md deleted file mode 100644 index 832373e4..00000000 --- a/versioned_docs/version-4.5/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -Harper uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all Harper operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their Harper credentials. The following POST body is sent to Harper. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by Harper. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by Harper. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.5/developers/security/mtls-auth.md b/versioned_docs/version-4.5/developers/security/mtls-auth.md deleted file mode 100644 index 375ec927..00000000 --- a/versioned_docs/version-4.5/developers/security/mtls-auth.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: mTLS Authentication ---- - -# mTLS Authentication - -Harper supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. diff --git a/versioned_docs/version-4.5/developers/security/users-and-roles.md b/versioned_docs/version-4.5/developers/security/users-and-roles.md deleted file mode 100644 index 1bf0b91b..00000000 --- a/versioned_docs/version-4.5/developers/security/users-and-roles.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -Harper utilizes a Role-Based Access Control (RBAC) framework to manage access to Harper instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in Harper - -Role permissions in Harper are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a Harper instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. Harper will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within Harper. See full breakdown of operations restricted to only super_user roles [here](#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a Harper instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a Harper instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```jsonc -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "database_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true, - }, - ], - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [], - }, - }, - }, - }, -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within Harper, but ignored, as super_users have full access to the database._ - -- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] -} -``` - -**Important Notes About Table Permissions** - -1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that Harper manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in Harper and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ - -| Databases and Tables | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| describe_all | | -| describe_database | | -| describe_table | | -| create_database | X | -| drop_database | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed Harper as `<>`. Because Harper stores files natively on the operating system, we only allow the Harper executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files Harper needs. This also keeps Harper more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.5/index.mdx b/versioned_docs/version-4.5/index.mdx deleted file mode 100644 index 105f0eb6..00000000 --- a/versioned_docs/version-4.5/index.mdx +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Harper Docs ---- - -import CustomDocCardList from '@site/src/components/CustomDocCardList'; - -# Harper Docs - -:::info - -### Get the Most Out of Harper - -Join our Discord to access expert support, collaborate with Harper’s core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) -::: - -Welcome to the Harper Documentation! Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. - -## Getting Started - -The best way to get started using Harper is to head over to the [Learn](/learn/) section and work through the Getting Started and Developer guides. - -## Building with Harper - - diff --git a/versioned_docs/version-4.5/reference/_category_.json b/versioned_docs/version-4.5/reference/_category_.json deleted file mode 100644 index 1a36ae90..00000000 --- a/versioned_docs/version-4.5/reference/_category_.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "link": { - "type": "doc", - "id": "reference/index" - } -} diff --git a/versioned_docs/version-4.5/reference/analytics.md b/versioned_docs/version-4.5/reference/analytics.md deleted file mode 100644 index 0ab51c67..00000000 --- a/versioned_docs/version-4.5/reference/analytics.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -Harper provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -Harper collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the Harper analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -The following are general resource usage statistics that are tracked: - -- `memory` - This includes RSS, heap, buffer and external data usage. -- `utilization` - How much of the time the worker was processing requests. -- mqtt-connections - The number of MQTT connections. - -The following types of information is tracked for each HTTP request: - -- `success` - How many requests returned a successful response (20x response code). TTFB - Time to first byte in the response to the client. -- `transfer` - Time to finish the transfer of the data to the client. -- bytes-sent - How many bytes of data were sent to the client. - -Requests are categorized by operation name, for the operations API, by the resource (name) with the REST API, and by command for the MQTT interface. diff --git a/versioned_docs/version-4.5/reference/architecture.md b/versioned_docs/version-4.5/reference/architecture.md deleted file mode 100644 index 4155d5ff..00000000 --- a/versioned_docs/version-4.5/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -Harper's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/versioned_docs/version-4.5/reference/blob.md b/versioned_docs/version-4.5/reference/blob.md deleted file mode 100644 index 57dd7081..00000000 --- a/versioned_docs/version-4.5/reference/blob.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Blob ---- - -# Blob - -Blobs are binary large objects that can be used to store any type of unstructured/binary data and is designed for large content. Blobs support streaming and feature better performance for content larger than about 20KB. Blobs are built off the native JavaScript `Blob` type, and HarperDB extends the native `Blob` type for integrated storage with the database. To use blobs, you would generally want to declare a field as a `Blob` type in your schema: - -```graphql -type MyTable { - id: Any! @primaryKey - data: Blob -} -``` - -You can then create a blob which writes the binary data to disk, and can then be included (as a reference) in a record. For example, you can create a record with a blob like: - -```javascript -let blob = createBlob(largeBuffer); -await MyTable.put({ id: 'my-record', data: blob }); -``` - -The `data` attribute in this example is a blob reference, and can be used like any other attribute in the record, but it is stored separately, and the data must be accessed asynchronously. You can retrieve the blob data with the standard `Blob` methods: - -```javascript -let buffer = await blob.bytes(); -``` - -If you are creating a resource method, you can return a `Response` object with a blob as the body: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - let record = super.get(target); - return { - status: 200, - headers: {}, - body: record.data, // record.data is a blob - }; - } -} -``` - -When using the exported REST APIs for your tables, blobs will by default be treated with a UTF-8 encoding and contain text/plain content. - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "Why hello there, world!" - }' -``` - -To store arbitrary binary content (such as audio data) in a blob, using CBOR is recommended when making API requests. This will let you control the contents of the blob precisely. - -If you need to use JSON, Base64 encoding your contents can be a great choice, but you'll need to do a bit of work to control the encoding of the underlying blob: - -```typescript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; - - create(target: RequestTarget, record: Partial) { - if (record.data) { - record.data = Buffer.from(record.data, 'base64'); - } - return super.create(target, record); - } -} -``` - -Now you can create records and they'll be encoded appropriately. For example, here's a small .jpg encoded in base64: - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "/9j/4QDKRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAVgEbAAUAAAABAAAAXgEoAAMAAAABAAIAAAITAAMAAAABAAEAAIdpAAQAAAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAeQAAAHAAAABDAyMjGRAQAHAAAABAECAwCgAAAHAAAABDAxMDCgAQADAAAAAQABAACgAgAEAAAAAQAAABCgAwAEAAAAAQAAABCkBgADAAAAAQAAAAAAAAAAAAD/2wCEAAEBAQEBAQIBAQIDAgICAwQDAwMDBAYEBAQEBAYHBgYGBgYGBwcHBwcHBwcICAgICAgJCQkJCQsLCwsLCwsLCwsBAgICAwMDBQMDBQsIBggLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLC//dAAQAAf/AABEIABAAEAMBIgACEQEDEQH/xAGiAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgsQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APz68CaN8Mp/DWveJviDqE0R0qGIwWsGEaR532J83uwwABXH+MtP8N6Hryad4cvJrm3lgjlX7WES4R2zujcIAvy8YIHQ+1eYeKdAu9VtTNpUvk3aAeWSxCblOVJA4O08jIrR0/R1txDc37m4u0QK8p7tjkgdBmv2zD4apGvUq1KjcXtHTTRWP0nEUqzxcatKbUEkuWy5fN3+Lmvt0tp2t//Z" - }' -``` - -One of the important characteristics of blobs is they natively support asynchronous streaming of data. This is important for both creation and retrieval of large data. When we create a blob with `createBlob`, the returned blob will create the storage entry, but the data will be streamed to storage. This means that you can create a blob from a buffer or from a stream. You can also create a record that references a blob before the blob is fully written to storage. For example, you can create a blob from a stream: - -```javascript -let blob = createBlob(stream); -// at this point the blob exists, but the data is still being written to storage -await MyTable.put({ id: 'my-record', data: blob }); -// we now have written a record that references the blob -let record = await MyTable.get('my-record'); -// we now have a record that gives us access to the blob. We can asynchronously access the blob's data or stream the data, and it will be available as blob the stream is written to the blob. -let stream = record.data.stream(); -``` - -This can be powerful functionality for large media content, where content can be streamed into storage as it streamed out in real-time to users as it is received, or even for web content where low latency transmission of data from origin is critical. However, this also means that blobs are _not_ atomic or [ACID](https://en.wikipedia.org/wiki/ACID) compliant; streaming functionality achieves the opposite behavior of ACID/atomic writes that would prevent access to data as it is being written, and wait until data is fully available before a commit. Alternately, we can also use the `saveBeforeCommit` flag to indicate that the blob should be fully written to storage before committing a transaction to ensure that the whole blob is available before the transaction commits and writes the record: - -```javascript -let blob = createBlob(stream, { saveBeforeCommit: true }); -// this put will not commit and resolve until the blob is written and then the record is written -await MyTable.put({ id: 'my-record', data: blob }); -``` - -Note that using `saveBeforeCommit` does not necessarily guarantee full ACID compliance. This can be combined with the `flush` flag to provide a stronger guarantee that a blob is flushed to disk before commiting a transaction. However, the error handling below provides a stronger guarantee of proper blob handling when the process of streaming/writing a blob is interrupted and using proper error handling is recommended, instead of relying `saveBeforeCommit`, for the best combination reliability and performance. - -### Error Handling - -Because blobs can be streamed and referenced prior to their completion, there is a chance that an error or interruption could occur while streaming data to the blob (after the record is committed). We can create an error handler for the blob to handle the case of an interrupted blob: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - const record = super.get(target); - let blob = record.data; - blob.on('error', () => { - // if this was a caching table, we may want to invalidate or delete this record: - MyTable.invalidate(target); - // we may want to re-retrieve the blob - }); - return { - status: 200, - headers: {}, - body: blob - }); - } -} -``` - -### Blob `size` - -Blobs that are created from streams may not have the standard `size` property available, because the size may not be known while data is being streamed. Consequently, the `size` property may be undefined until the size is determined. You can listen for the `size` event to be notified when the size is available: - -```javascript -let record = await MyTable.get('my-record'); -let blob = record.data; -blob.size; // will be available if it was saved with a known size -let stream = blob.stream(); // start streaming the data -if (blob.size === undefined) { - blob.on('size', (size) => { - // will be called once the size is available - }); -} -``` - -### Blob Coercion - -When a field is defined to use the `Blob` type, any strings or buffers that are assigned to that field in a `put`, `patch`, or `publish`, will automatically be coerced to a `Blob`. This makes it easy to use a `Blob` type even with JSON data that may come HTTP request bodies or MQTT messages, that do not natively support a `Blob` type. - -See the [configuration](../deployments/configuration) documentation for more information on configuring where blob are stored. diff --git a/versioned_docs/version-4.5/reference/clustering/certificate-management.md b/versioned_docs/version-4.5/reference/clustering/certificate-management.md deleted file mode 100644 index 43839a4b..00000000 --- a/versioned_docs/version-4.5/reference/clustering/certificate-management.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box Harper generates certificates that are used when Harper nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the Harper node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that Harper generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your Harper cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make Harper start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart Harper. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other Harper nodes and to make requests to other Harper nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart Harper. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.5/reference/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.5/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 0a8b2a6c..00000000 --- a/versioned_docs/version-4.5/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via Harper users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, Harper must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.5/reference/clustering/enabling-clustering.md b/versioned_docs/version-4.5/reference/clustering/enabling-clustering.md deleted file mode 100644 index 606bc29c..00000000 --- a/versioned_docs/version-4.5/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install Harper**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.5/reference/clustering/establishing-routes.md b/versioned_docs/version-4.5/reference/clustering/establishing-routes.md deleted file mode 100644 index e4a0ea89..00000000 --- a/versioned_docs/version-4.5/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the Harper configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.5/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.5/reference/clustering/index.md b/versioned_docs/version-4.5/reference/clustering/index.md deleted file mode 100644 index fddd3851..00000000 --- a/versioned_docs/version-4.5/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: NATS Clustering ---- - -# NATS Clustering - -Harper 4.0 - 4.3 used a clustering system based on NATS for replication. In 4.4+, Harper has moved to a new native replication system that has better performance, reliability, and data consistency. This document describes the legacy NATS clustering system. Harper clustering is the process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. - -Harper’s clustering engine replicates data between instances of Harper using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -Harper simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting Harper focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to Harper, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.5/reference/clustering/managing-subscriptions.md b/versioned_docs/version-4.5/reference/clustering/managing-subscriptions.md deleted file mode 100644 index f043c9d1..00000000 --- a/versioned_docs/version-4.5/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Managing subscriptions ---- - -Tables are replicated when the table is designated as replicating and there is subscription between the nodes. -Tables designated as replicating by default, but can be changed by setting `replicate` to `false` in the table definition: - -```graphql -type Product @table(replicate: false) { - id: ID! - name: String! -} -``` - -Or in your harperdb-config.yaml, you can set the default replication behavior for databases, and indicate which databases -should be replicated by default: - -```yaml -replication: - databases: data -``` - -If a table is not in the list of databases to be replicated, it will not be replicated unless the table is specifically set to replicate: - -```graphql -type Product @table(replicate: true) { - id: ID! - name: String! -} -``` - -Reading hdb*nodes (what we do \_to* the node, not what the node does). - -The subscription can be set to publish, subscribe, or both. - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `set_node_replication`. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "data", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "database": "data", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "database": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.5/reference/clustering/naming-a-node.md b/versioned_docs/version-4.5/reference/clustering/naming-a-node.md deleted file mode 100644 index 7a512efb..00000000 --- a/versioned_docs/version-4.5/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.5/reference/clustering/requirements-and-definitions.md b/versioned_docs/version-4.5/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 22bc3977..00000000 --- a/versioned_docs/version-4.5/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of Harper running. - -\*_A node is a single instance/installation of Harper. A node of Harper can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a Harper cluster. diff --git a/versioned_docs/version-4.5/reference/clustering/subscription-overview.md b/versioned_docs/version-4.5/reference/clustering/subscription-overview.md deleted file mode 100644 index 12a929f6..00000000 --- a/versioned_docs/version-4.5/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.5/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.5/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.5/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.5/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.5/reference/clustering/things-worth-knowing.md b/versioned_docs/version-4.5/reference/clustering/things-worth-knowing.md deleted file mode 100644 index cd81f348..00000000 --- a/versioned_docs/version-4.5/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -Harper has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -Harper clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.5/clustering/figure6.png) diff --git a/versioned_docs/version-4.5/reference/content-types.md b/versioned_docs/version-4.5/reference/content-types.md deleted file mode 100644 index b7d223f4..00000000 --- a/versioned_docs/version-4.5/reference/content-types.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -Harper supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. Harper follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard Harper operations. - -:::tip Need a custom content type? - -Harper's extensible content type system lets you add support for any serialization format (XML, YAML, proprietary formats, etc.) by registering custom handlers in the [`contentTypes`](./globals.md#contenttypes) global Map. See the linked API reference for detailed implementation types, handler properties, and examples. - -::: - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by Harper, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with Harper. CBOR supports the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and Harper's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all Harper data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with Harper's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. - -In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/versioned_docs/version-4.5/reference/data-types.md b/versioned_docs/version-4.5/reference/data-types.md deleted file mode 100644 index 8dd902e6..00000000 --- a/versioned_docs/version-4.5/reference/data-types.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -Harper supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (Harper’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. Harper supports MessagePack and CBOR, which allows for all of Harper supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: - -- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") -- `Int` - Any integer between from -2147483648 to 2147483647 -- `Long` - Any integer between from -9007199254740992 to 9007199254740992 -- `BigInt` - Any integer (negative or positive) with less than 300 digits - -Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in Harper. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in Harper’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in Harper. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in Harper property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well, with two different data types that are available: - -### Bytes - -JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -### Blobs - -Binary data can also be stored with [`Blob`s](./blob), which can scale much better for larger content than `Bytes`, as it is designed to be streamed and does not need to be held entirely in memory. It is recommended that `Blob`s are used for content larger than 20KB. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in Harper as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.5/reference/dynamic-schema.md b/versioned_docs/version-4.5/reference/dynamic-schema.md deleted file mode 100644 index 3c44d5af..00000000 --- a/versioned_docs/version-4.5/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. Harper tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -Harper databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -Harper tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in Harper operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [Harper Storage Algorithm](./storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to Harper. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. Harper offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -Harper automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [Harper API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.5/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.5/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.5/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.5/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.5/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.5/reference/globals.md b/versioned_docs/version-4.5/reference/globals.md deleted file mode 100644 index 05d899c9..00000000 --- a/versioned_docs/version-4.5/reference/globals.md +++ /dev/null @@ -1,399 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with Harper is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that Harper provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -## `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -## `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -## `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](./resource) for more details about implementing a Resource class. - -## `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -## `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -## `server` - -The `server` global object provides a number of functions and objects to interact with Harper's HTTP, networking, and authentication services. - -### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` - -Alias: `server.request` - -Add a handler method to the HTTP server request listener middleware chain. - -Returns an array of server instances based on the specified `options.port` and `options.securePort`. - -Example: - -```js -server.http( - (request, next) => { - return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); - }, - { - runFirst: true, // run this handler first - } -); -``` - -#### `RequestListener` - -Type: `(request: Request, next: RequestListener) => Promise` - -The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. - -### `Request` and `Response` - -The `Request` and `Response` classes are based on the WHATWG APIs for the [`Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) and [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) classes. Requests and responses are based on these standard-based APIs to facilitate reuse with modern web code. While Node.js' HTTP APIs are powerful low-level APIs, the `Request`/`Response` APIs provide excellent composability characteristics, well suited for layered middleware and for clean mapping to [RESTful method handlers](./resource) with promise-based responses, as well as interoperability with other standards-based APIs like [streams](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) used with [`Blob`s](https://developer.mozilla.org/en-US/docs/Web/API/Blob). However, the Harper implementation of these classes is not a direct implementation of the WHATWG APIs, but implements additional/distinct properties for the the Harper server environment: - -#### `Request` - -A `Request` object is passed to the direct static REST handlers, and preserved as the context for instance methods, and has the following properties: - -- `url` - This is the request target, which is the portion of the URL that was received by the server. If a client sends a request to `https://example.com:8080/path?query=string`, the actual received request is `GET /path?query=string` and the `url` property will be `/path?query=string`. -- `method` - This is the HTTP method of the request. This is a string like `GET`, `POST`, `PUT`, `DELETE`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the request. -- `pathname` - This is the path portion of the URL, without the query string. For example, if the URL is `/path?query=string`, the `pathname` will be `/path`. -- `protocol` - This is the protocol of the request, like `http` or `https`. -- `data` - This is the deserialized body of the request (based on the type of data specified by `Content-Type` header). -- `ip` - This is the remote IP address of the client that made the request (or the remote IP address of the last proxy to connect to Harper). -- `host` - This is the host of the request, like `example.com`. -- `sendEarlyHints(link: string, headers?: object): void` - This method sends an early hints response to the client, prior to actually returning a response. This is useful for sending a link header to the client to indicate that another resource should be preloaded. The `headers` argument can be used to send additional headers with the early hints response, in addition to the `link`. This is generally most helpful in a cache resolution function, where you can send hints _if_ the data is not in the cache and is resolving from an origin: - -```javascript -class Origin { - async get(request) { - // if we are fetching data from origin, send early hints - this.getContext().requestContext.sendEarlyHints(''); - let response = await fetch(request); - ... - } -} -Cache.sourcedFrom(Origin); -``` - -- `login(username, password): Promise` - This method can be called to start an authenticated session. The login will authenticate the user by username and password. If the authentication was successful, a session will be created and a cookie will be set on the response header that references the session. All subsequent requests from the client that sends the cookie in requests will be authenticated as the user that logged in and the session record will be attached to the request. This method returns a promise that resolves when the login is successful, and rejects if the login is unsuccessful. -- `session` - This is the session object that is associated with current cookie-maintained session. This object is used to store session data for the current session. This is `Table` record instance, and can be updated by calling `request.session.update({ key: value })` or session can be retrieved with `request.session.get()`. If the cookie has not been set yet, a cookie will be set the first time a session is updated or a login occurs. -- `_nodeRequest` - This is the underlying Node.js [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) object. This can be used to access the raw request data, such as the raw headers, raw body, etc. However, this is discouraged and should be used with caution since it will likely break any other server handlers that depends on the layered `Request` call with `Response` return pattern. -- `_nodeResponse` - This is the underlying Node.js [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) object. This can be used to access the raw response data, such as the raw headers. Again, this is discouraged and can cause problems for middleware, should only be used if you are certain that other server handlers will not attempt to return a different `Response` object. - -#### `Response` - -REST methods can directly return data that is serialized and returned to users, or it can return a `Response` object (or a promise to a `Response`), or it can return a `Response`-like object with the following properties (or again, a promise to it): - -- `status` - This is the HTTP status code of the response. This is a number like `200`, `404`, `500`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the response. -- `data` - This is the data to be returned of the response. This will be serialized using Harper's content negotiation. -- `body` - Alternately (to `data`), the raw body can be returned as a `Buffer`, string, stream (Node.js or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)), or a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). - -#### `HttpOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -#### `HttpServer` - -Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. - -### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` - -Creates a socket server on the specified `options.port` or `options.securePort`. - -Only one socket server will be created. A `securePort` takes precedence. - -#### `ConnectionListener` - -Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) - -#### `SocketOptions` - -- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. -- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -#### `SocketServer` - -Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` - -Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](./globals#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. - -Example: - -```js -server.ws((ws, request, chainCompletion) => { - chainCompletion.then(() => { - ws.on('error', console.error); - - ws.on('message', function message(data) { - console.log('received: %s', data); - }); - - ws.send('something'); - }); -}); -``` - -#### `WsListener` - -Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` - -The WebSocket connection listener. - -- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. -- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. - -* The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. -* The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` - -#### `WsOptions` - -Type: `Object` - -Properties: - -- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` - -Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](./globals#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. - -This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. - -Example: - -> This example is from the Harper Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) - -```js -server.upgrade( - (request, socket, head, next) => { - if (request.url === '/_next/webpack-hmr') { - return upgradeHandler(request, socket, head).then(() => { - request.__harperdb_request_upgraded = true; - - next(request, socket, head); - }); - } - - return next(request, socket, head); - }, - { runFirst: true } -); -``` - -#### `UpgradeListener` - -Type: `(request, socket, head, next) => void` - -The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. - -#### `UpgradeOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.config` - -This provides access to the Harper configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into Harper's analytics. Harper efficiently records and tracks these metrics and makes them available through [analytics API](./analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. - -### `server.getUser(username): Promise` - -This returns the user object with permissions/authorization information based on the provided username. This does not verify the password, so it is generally used for looking up users by username. If you want to verify a user by password, use [`server.authenticateUser`](./globals#serverauthenticateuserusername-password-user). - -### `server.authenticateUser(username, password): Promise` - -This returns the user object with permissions/authorization information based on the provided username. The password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -### `server.resources: Resources` - -This provides access to the map of all registered resources. This is the central registry in Harper for registering any resources to be exported for use by REST, MQTT, or other components. Components that want to register resources should use the `server.resources.set(name, resource)` method to add to this map. Exported resources can be found by passing in a path to `server.resources.getMatch(path)` which will find any resource that matches the path or beginning of the path. - -#### `server.resources.set(name, resource, exportTypes?)` - -Register a resource with the server. For example: - -``` -class NewResource extends Resource { -} -server.resources.set('NewResource', Resource); -/ or limit usage: -server.resources.set('NewResource', Resource, { rest: true, mqtt: false, 'my-protocol': true }); -``` - -#### `server.resources.getMatch(path, exportType?)` - -Find a resource that matches the path. For example: - -``` -server.resources.getMatch('/NewResource/some-id'); -/ or specify the export/protocol type, to allow it to be limited: -server.resources.getMatch('/NewResource/some-id', 'my-protocol'); -``` - -### `server.contentTypes` - -Returns the `Map` of registered content type handlers. Same as the [`contentTypes`](./globals#contenttypes) global. - -## `contentTypes` - -Returns a [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for request/response serialization. - -HarperDB uses content negotiation to automatically handle data serialization and deserialization for HTTP requests and other protocols. This process works by: - -1. **Request Processing**: Comparing the `Content-Type` header with registered handlers to deserialize incoming data into structured formats for processing and storage -2. **Response Generation**: Comparing the `Accept` header with registered handlers to serialize structured data into the appropriate response format - -### Built-in Content Types - -HarperDB includes handlers for common formats: - -- **JSON** (`application/json`) -- **CBOR** (`application/cbor`) -- **MessagePack** (`application/msgpack`) -- **CSV** (`text/csv`) -- **Event-Stream** (`text/event-stream`) -- And more... - -### Custom Content Type Handlers - -You can extend or replace content type handlers by modifying the `contentTypes` map from the `server` global (or `harperdb` export). The map is keyed by MIME type, with values being handler objects containing these optional properties: - -#### Handler Properties - -- **`serialize(data: any): Buffer | Uint8Array | string`** - Called to convert data structures into the target format for responses. Should return binary data (Buffer/Uint8Array) or a string. - -- **`serializeStream(data: any): ReadableStream`** - Called to convert data structures into streaming format. Useful for handling asynchronous iterables or large datasets. - -- **`deserialize(buffer: Buffer | string): any`** - Called to convert incoming request data into structured format. Receives a string for text MIME types (`text/*`) and a Buffer for binary types. Only used if `deserializeStream` is not defined. - -- **`deserializeStream(stream: ReadableStream): any`** - Called to convert incoming request streams into structured format. Returns deserialized data (potentially as an asynchronous iterable). - -- **`q: number`** _(default: 1)_ - Quality indicator between 0 and 1 representing serialization fidelity. Used in content negotiation to select the best format when multiple options are available. The server chooses the content type with the highest product of client quality × server quality values. - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` diff --git a/versioned_docs/version-4.5/reference/graphql.md b/versioned_docs/version-4.5/reference/graphql.md deleted file mode 100644 index 2dc0093d..00000000 --- a/versioned_docs/version-4.5/reference/graphql.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: GraphQL Querying ---- - -# GraphQL Querying - -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../developers/applications/defining-schemas), and for querying [Resources](./resource). - -Get started by setting `graphql: true` in `config.yaml`. - -This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. - -> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. - -Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. - -For example, to request the GraphQL Query: - -```graphql -query GetDogs { - Dog { - id - name - } -} -``` - -The `GET` request would look like: - -```http -GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D -Accept: application/graphql-response+json -``` - -And the `POST` request would look like: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDogs { Dog { id name } } }" -} -``` - -> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. - -The Harper GraphQL querying system is strictly limited to exported Harper Resources. For many users, this will typically be a table that uses the `@exported` directive in its schema. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resource#query) for more complex queries. - -Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: - -```graphql -query GetDogsAndOwners { - Dog { - id - name - breed - } - - Owner { - id - name - occupation - } -} -``` - -This will return all dogs and owners in the database. And is equivalent to executing two REST queries: - -```http -GET /Dog/?select(id,name,breed) -# and -GET /Owner/?select(id,name,occupation) -``` - -### Request Parameters - -There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` - -1. `query` - _Required_ - The string representation of the GraphQL document. - 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. - 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). - 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. -1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter -1. `variables` - _Optional_ - A map of variable values to be used for the specified query - -### Type Checking - -The Harper GraphQL Querying system takes many liberties from the GraphQL specification. This extends to how it handle type checking. In general, the querying system does **not** type check. Harper uses the `graphql` parser directly, and then performs a transformation on the resulting AST. We do not control any type checking/casting behavior of the parser, and since the execution step diverges from the spec greatly, the type checking behavior is only loosely defined. - -In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. - -For example, the variable `$name: String!` states that `name` should be a non-null, string value. - -- If the request does not contain the `name` variable, an error will be returned -- If the request provides `null` for the `name` variable, an error will be returned -- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. -- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. - - If `null` is provided as the variable value, an error will still be returned. - - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. -- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. - -The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. - -- Objects will be transformed into properly nested attributes -- Strings and Boolean values are passed through as their AST values -- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. -- List and Enums are not supported. - -### Fragments - -The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. - -For example, in the query - -```graphql -query Get { - Dog { - ...DogFields - } -} - -fragment DogFields on Dog { - name - breed -} -``` - -The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). - -You can literally specify anything in the fragment and it will behave the same way: - -```graphql -fragment DogFields on Any { ... } # this is recommended -fragment DogFields on Cat { ... } -fragment DogFields on Animal { ... } -fragment DogFields on LiterallyAnything { ... } -``` - -As an actual example, fragments should be used for composition: - -```graphql -query Get { - Dog { - ...sharedFields - breed - } - Owner { - ...sharedFields - occupation - } -} - -fragment sharedFields on Any { - id - name -} -``` - -### Short Form Querying - -Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. - -For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. - -```graphql -query GetDog($id: ID!) { - Dog(id: $id) { - name - breed - owner { - name - } - } -} -``` - -And as a properly formed request: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", - "variables": { - "id": "0" - } -} -``` - -The REST equivalent would be: - -```http -GET /Dog/?id==0&select(name,breed,owner{name}) -# or -GET /Dog/0?select(name,breed,owner{name}) -``` - -Short form queries can handle nested attributes as well. - -For example, return all dogs who have an owner with the name `"John"` - -```graphql -query GetDog { - Dog(owner: { name: "John" }) { - name - breed - owner { - name - } - } -} -``` - -Would be equivalent to - -```http -GET /Dog/?owner.name==John&select(name,breed,owner{name}) -``` - -And finally, we can put all of these together to create semi-complex, equality based queries! - -The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. - -```graphql -query GetDog($dogName: String!, $ownerName: String!) { - Dog(name: $dogName, owner: { name: $ownerName }) { - name - breed - owner { - name - } - } -} -``` - -### Long Form Querying - -> Coming soon! - -### Mutations - -> Coming soon! - -### Subscriptions - -> Coming soon! - -### Directives - -> Coming soon! diff --git a/versioned_docs/version-4.5/reference/headers.md b/versioned_docs/version-4.5/reference/headers.md deleted file mode 100644 index 5c85fc88..00000000 --- a/versioned_docs/version-4.5/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Harper Headers ---- - -# Harper Headers - -All Harper API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all Harper API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.5/reference/index.md b/versioned_docs/version-4.5/reference/index.md deleted file mode 100644 index 4c5d867a..00000000 --- a/versioned_docs/version-4.5/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for Harper. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.5/reference/limits.md b/versioned_docs/version-4.5/reference/limits.md deleted file mode 100644 index 97214620..00000000 --- a/versioned_docs/version-4.5/reference/limits.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Harper Limits ---- - -# Harper Limits - -This document outlines limitations of Harper. - -## Database Naming Restrictions - -**Case Sensitivity** - -Harper database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -Harper database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -Harper limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. - -## Primary Keys - -The maximum length of a primary key is 1978 bytes or 659 characters (whichever is shortest). diff --git a/versioned_docs/version-4.5/reference/query-optimization.md b/versioned_docs/version-4.5/reference/query-optimization.md deleted file mode 100644 index 139b862b..00000000 --- a/versioned_docs/version-4.5/reference/query-optimization.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Query Optimization ---- - -## Query Optimization - -Harper has powerful query functionality with excellent performance characteristics. However, like any database, different queries can vary significantly in performance. It is important to understand how querying works to help you optimize your queries for the best performance. - -### Query Execution - -At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database and delivering the results based on required fields, relationships, and ordering. Harper supports indexed fields, and these indexes are used to speed up query execution. When conditions are specified in a query, Harper will attempt to utilize indexes to optimize the speed of query execution. When a field is not indexed, a query specifies a condition on that field, and the database check each potential record to determine if it matches the condition. - -When a query is performed with multiple conditions, Harper will attempt to optimize the ordering of these conditions. When using intersecting conditions (the default, an `and` operator, matching records must all match all conditions), Harper will attempt to to apply the most selective and performant condition first. This means that if one condition can use an index and is more selective than another, it will be used first to find the initial matching set of data and then filter based on the remaining conditions. If a condition can search an indexed field, with a selective condition, it will be used before conditions that aren't indexed, or as selective. The `search` method includes an `explain` flag that can be used to return a query execution order to understand how the query is being executed. This can be useful for debugging and optimizing queries. - -For a union query, each condition is executed separately and the results are combined/merged. - -### Condition, Operators, and Indexing - -When a query is performed, the conditions specified in the query are evaluated against the data in the database. The conditions can be simple or complex, and can include scalar operators such as `=`, `!=`, `>`, `<`, `>=`, `<=`, as well as `starts_with`, `contains`, and `ends_with`. The use of these operators can affect the performance of the query, especially when used with indexed fields. If an indexed field is not used, the database will have to check each potential record to determine if it matches the condition. If the only condition is not indexed, or there are no conditions with an indexed field, the database will have to check every record with a full table scan and can be very slow for large datasets (it will get slower as the dataset grows, `O(n)`). - -The use of indexed fields can significantly improve the performance of a query, providing fast performance even as the database grows in size (`O(log n)`). However, indexed fields require extra writes to the database when performing insert, update, or delete operations. This is because the index must be updated to reflect the changes in the data. This can slow down write operations, but the trade-off is often worth it if the field is frequently used in queries. - -The different operators can also affect the performance of a query. For example, using the `=` operator on an indexed field is generally faster than using the `!=` operator, as the latter requires checking all records that do not match the condition. An index is a sorted listed of values, so the greater than and less than operators will also utilize indexed fields when possible. If the range is narrow, these operations can be very fast. A wide range could yield a large number of records and will naturally incur more overhead. The `starts_with` operator can also leverage indexed fields because it quickly find the correct matching entries in the sorted index. On other hand, the `contains` and `ends_with` and not equal (`!=` or `not_equal`) operators can not leverage the indexes, so they will require a full table scan to find the matching records if they are not used in conjunction in with a selective/indexed condition. There is a special case of `!= null` which can use indexes to find non-null records. However, there is generally only helpful for sparse fields where a small subset are non-null values. More generally, operators are more efficient if they are selecting on fields with a high cardinality. - -Conditions can be applied to primary key fields or other indexed fields (known as secondary indexes). In general, querying on a primary key will be faster than querying on a secondary index, as the primary key is the most efficient way to access data in the database, and doesn't require cross-referencing to the main records. - -### Relationships/Joins - -Harper supports relationships between tables, allowing for "join" queries that. This does result in more complex queries with potentially larger performance overhead, as more lookups are necessary to connect matched or selected data with other tables. Similar principles apply to conditions which use relationships. Indexed fields and comparators that leverage the ordering are still valuable for performance. It is also important that if a condition on a table is connected to another table's foreign key, that that foreign key also be indexed. Likewise, if a query `select`s data from a related table that uses a foreign key to relate, that it is indexed. The same principles of higher cardinality applies here as well, more unique values allow for efficient lookups. - -### Sorting - -Queries can also specify a sort order. This can also significantly impact performance. If a query specifies a sort order on an indexed field, the database can use the index to quickly retrieve the data in the specified order. A sort order can be used in conjunction with a condition on the same (indexed) field can utilize the index for ordering. However, if the sort order is not on an indexed field, or the query specifies conditions on different fields, Harper will generally need to sort the data after retrieving it, which can be slow for large datasets. The same principles apply to sorting as they do to conditions. Sorting on a primary key is generally faster than sorting on a secondary index, if the condition aligns with the sort order. - -### Streaming - -One of the unique and powerful features of Harper's querying functionality is the ability to stream query results. When possible, Harper can return records from a query as they are found, rather than waiting for the entire query to complete. This can significantly improve performance for large queries, as it allows the application to start processing results or sending the initial data before the entire query is complete (improving time-to-first-byte speed, for example). However, using a sort order on a query with conditions that are not on an aligned index requires that the entire query result be loaded in order to perform the sorting, which defeats the streaming benefits. diff --git a/versioned_docs/version-4.5/reference/resource.md b/versioned_docs/version-4.5/reference/resource.md deleted file mode 100644 index cc83541b..00000000 --- a/versioned_docs/version-4.5/reference/resource.md +++ /dev/null @@ -1,797 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to provide a unified API for modeling different data resources within Harper. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information to during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: - -- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initial access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. - - You can subsequently use the instance methods on the returned resource instance to perform additional actions on the record. -- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. - -The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. - -The REST-based API is a little different than traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind, but semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: - -- Read - `get` -- Create with a known primary key - `put` -- Create with a generated primary key - `post`/`create` -- Update (Full) - `put` -- Update (Partial) - `patch` -- Delete - `delete` - -The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped to a Resource instance where the instance's ID will be `3492`, and interactions will use the instance methods like `get()`, `put()`, and `post()`. Using the root path (`/MyResource/`) will map to a Resource instance with an ID of `null`, and this represents the collection of all the records in the resource or table. - -You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the Harper JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - async get() { - // fetch data from an external source, using our id - let response = await this.fetch(this.id); - // do something with the response - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -#### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -#### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../developers/components/reference#extensions). - -### `transaction` - -This provides a function for starting transactions. See the transactions section below for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. - -### `search(query: Query)`: AsyncIterable - -This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object, query?: Query): Resource|void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. - -The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -put(data, query) { - let param1 = query?.get?.('param1'); // returns 'value' - ... -} -``` - -### `patch(data: object): Resource|void|Response` - -### `patch(data: object, query?: Query)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?): Resource|void|Response` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message): Resource|void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data: object, query?: Query): Resource|void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowRead(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `allowUpdate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowDelete(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `addTo(property, value)` - -This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(id: Id, data: object, context?: Resource|Context): Promise` - -### `post(data: object, context?: Resource|Context): Promise` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](./transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - `{ attribute: 'featured', value: true }`, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [`{ attribute: ['brand', 'name'], value: 'Harper' }`] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attribute`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: `{ attribute: 'price' }`, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.5/reference/roles.md b/versioned_docs/version-4.5/reference/roles.md deleted file mode 100644 index 2e3dc570..00000000 --- a/versioned_docs/version-4.5/reference/roles.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Roles ---- - -# Roles - -Roles in Harper are part of the application’s role-based access control (RBAC) system. You can declare roles in your application and manage their permissions through a roles configuration file. When the application starts, Harper will ensure all declared roles exist with the specified permissions, updating them if necessary. - -## Configuring Roles - -Point to a roles configuration file from your application’s `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -You can declare one or more files. Each file should define one or more roles in YAML format. - -## Roles File Structure - -A roles file (`roles.yaml`) contains role definitions keyed by role name. Each role may contain: - -- **super_user** – a boolean that grants all permissions. -- **databases** – one or more databases the role has access to. -- **tables** – within each database, table-level and attribute-level permissions. - -**Full Example** - -```yaml -: - super_user: # optional - : - : - read: - insert: - update: - delete: - attributes: - : - read: - insert: - update: -``` - -## Role Flags - -- `super_user: true` — grants full system access. -- `super_user: false` — the role only has the explicit permissions defined in the role. - -## Database and Table Permissions - -Within each role, you may specify one or more databases. Each database can declare permissions for tables. - -Example: - -```yaml -analyst: - super_user: false - data: - Sales: - read: true - insert: false - update: false - delete: false -``` - -In this example, the `analyst` role has read-only access to the `Sales` table in the `data` database. - -## Attribute-Level Permissions - -You can also grant or deny access at the attribute level within a table. - -Example: - -```yaml -editor: - data: - Articles: - read: true - insert: true - update: true - attributes: - title: - read: true - update: true - author: - read: true - update: false -``` - -Here, the `editor` role can update the `title` of an article but cannot update the `author`. - -## Multiple Roles - -Roles can be defined side by side in a single file: - -```yaml -reader: - super_user: false - data: - Dog: - read: true - -writer: - super_user: false - data: - Dog: - insert: true - update: true -``` - -## Behavior on Startup - -- If a declared role does not exist, Harper creates it. -- If a declared role already exists, Harper updates its permissions to match the definition. -- Roles are enforced consistently across deployments, keeping access control in sync with your application code. diff --git a/versioned_docs/version-4.5/reference/sql-guide/date-functions.md b/versioned_docs/version-4.5/reference/sql-guide/date-functions.md deleted file mode 100644 index c9747dcd..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: SQL Date Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Date Functions - -Harper utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.5/reference/sql-guide/features-matrix.md b/versioned_docs/version-4.5/reference/sql-guide/features-matrix.md deleted file mode 100644 index 7766faa4..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## SQL Features Matrix - -Harper provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.5/reference/sql-guide/functions.md b/versioned_docs/version-4.5/reference/sql-guide/functions.md deleted file mode 100644 index 02fff906..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/functions.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: Harper SQL Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Functions - -This SQL keywords reference contains the SQL functions available in Harper. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.5/reference/sql-guide/index.md b/versioned_docs/version-4.5/reference/sql-guide/index.md deleted file mode 100644 index 52f245ab..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## Harper SQL Guide - -The purpose of this guide is to describe the available functionality of Harper as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -Harper adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -Harper has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -Harper supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. Harper does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -Harper supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -Harper supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -Harper allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.5/reference/sql-guide/json-search.md b/versioned_docs/version-4.5/reference/sql-guide/json-search.md deleted file mode 100644 index 1c0c396b..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/json-search.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: SQL JSON Search ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL JSON Search - -Harper automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, Harper offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.5/reference/sql-guide/reserved-word.md b/versioned_docs/version-4.5/reference/sql-guide/reserved-word.md deleted file mode 100644 index 2cd812ba..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: Harper SQL Reserved Words ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: - -``` -SELECT * from data.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from data.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.5/reference/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.5/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index 0c56cf10..00000000 --- a/versioned_docs/version-4.5/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,419 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Geospatial Functions - -Harper geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: [https://geojson.org/](https://geojson.org/). There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -2. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -3. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between Harper’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain Harper Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see [https://developers.arcgis.com/documentation/spatial-references/](https://developers.arcgis.com/documentation/spatial-references/). Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find Harper Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "Harper Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.5/reference/storage-algorithm.md b/versioned_docs/version-4.5/reference/storage-algorithm.md deleted file mode 100644 index c755adb2..00000000 --- a/versioned_docs/version-4.5/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The Harper storage algorithm is fundamental to the Harper core functionality, enabling the [Dynamic Schema](./dynamic-schema) and all other user-facing functionality. Harper is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within Harper. - -## Query Language Agnostic - -The Harper storage algorithm was designed to abstract the data storage from any individual query language. Harper currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, Harper offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each Harper table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. Harper tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [Harper Dynamic Schema](./dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -Harper inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## Harper Indexing Example (Single Table) - -![](/img/v4.5/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.5/reference/transactions.md b/versioned_docs/version-4.5/reference/transactions.md deleted file mode 100644 index 7e8546fb..00000000 --- a/versioned_docs/version-4.5/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. Harper provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in Harper. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because Harper is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the Harper environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_docs/version-4.6/administration/_category_.json b/versioned_docs/version-4.6/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/versioned_docs/version-4.6/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/versioned_docs/version-4.6/administration/administration.md b/versioned_docs/version-4.6/administration/administration.md deleted file mode 100644 index e0084bb5..00000000 --- a/versioned_docs/version-4.6/administration/administration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -Harper is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own Harper servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database Harper is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. Harper provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](administration/logging/audit-logging): Harper defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). Harper has a [`get_backup`](./developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. Harper can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -Harper provides rapid horizontal scaling capabilities through [node cloning functionality described here](administration/cloning). - -### Monitoring - -Harper provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: - -- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](./reference/analytics). -- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](./developers/operations-api/system-operations). -- Information about the current cluster configuration and status can be found in the [cluster APIs](./developers/operations-api/clustering). -- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor Harper with Graphana. - -### Replication Transaction Logging - -Harper utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](administration/logging/transaction-logging). diff --git a/versioned_docs/version-4.6/administration/cloning.md b/versioned_docs/version-4.6/administration/cloning.md deleted file mode 100644 index 8a39cabd..00000000 --- a/versioned_docs/version-4.6/administration/cloning.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that when pointed to another instance of Harper will create a clone of that -instance's config, databases and setup full replication. If it is run in a location where there is no existing Harper install, -it will, along with cloning, install Harper. If it is run in a location where there is another Harper instance, it will -only clone config, databases and replication that do not already exist. - -Clone node is triggered when Harper is installed or started with certain environment or command line (CLI) variables set (see below). - -**Leader node** - the instance of Harper you are cloning. -**Clone node** - the new node which will be a clone of the leader node. - -To start clone run `harperdb` in the CLI with either of the following variables set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `REPLICATION_HOSTNAME` - _(optional)_ The clones replication hostname. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 REPLICATION_HOSTNAME=node-2.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--REPLICATION_HOSTNAME` - _(optional)_ The clones clustering host. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --REPLICATION_HOSTNAME node-2.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from -running again. If you want to run clone again set this value to `false`. If Harper is started with the clone variables -still present and `cloned` is true, Harper will just start as normal. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI -variables or cloning overtop of an existing `harperdb-config.yaml` file. - -More can be found in the Harper config documentation [here](../deployments/configuration). - -### Excluding database and components - -To set any specific (optional) clone config, including the exclusion of any database and/or replication, there is a file -called `clone-node-config.yaml` that can be used. - -The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. -If the directory does not exist, create one and add the file to it). - -The config available in `clone-node-config.yaml` is: - -```yaml -databaseConfig: - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -componentConfig: - exclude: - - name: null -``` - -_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, -unless it already exists on the clone._ - -`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. - -`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, -it will only clone the component reference that exists in the leader harperdb-config file. - -### Cloning configuration - -Clone node will not clone any configuration that is classed as unique to the leader node. This includes `replication.hostname`, `replication.url`,`clustering.nodeName`, -`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, -any authentication certificate/key paths. - -### Cloning system database - -Harper uses a database called `system` to store operational information. Clone node will only clone the user and role -tables from this database. It will also set up replication on this table, which means that any existing and future user and roles -that are added will be replicated throughout the cluster. - -Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with -the leader. - -### Replication - -If clone is run with the `REPLICATION_HOSTNAME` variable set, a fully replicating clone will be created. - -If any databases are excluded from the clone, replication will not be set up on these databases. - -### JWT Keys - -If cloning with replication, the leader's JWT private and public keys will be cloned. To disable this, include `CLONE_KEYS=false` in your clone variables. - -### Cloning overtop of an existing Harper instance - -Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication -that does not exist on the node it is running on. - -An example of how this can be useful is if you want to set Harper config before the clone is created. To do this you -would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then -when clone is run it will append the missing config to the file and install Harper with the desired config. - -Another useful example could be retroactively adding another database to an existing instance. Running clone on -an existing instance could create a full clone of another database and set up replication between the database on the -leader and the clone. - -### Cloning steps - -Clone node will execute the following steps when ran: - -1. Look for an existing Harper install. It does this by using the default (or user provided) `ROOTPATH`. -1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start Harper. -1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). -1. Fully clone any databases that don't already exist. -1. If classed as a "fresh clone", install Harper. An instance is classed as a fresh clone if there is no system database. -1. If `REPLICATION_HOSTNAME` is set, set up replication between the leader and clone. -1. Clone is complete, start Harper. - -### Cloning with Docker - -To run clone inside a container add the environment variables to your run command. - -For example: - -``` -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_LEADER_PASSWORD=password \ - -e HDB_LEADER_USERNAME=admin \ - -e HDB_LEADER_URL=https://1.123.45.6:9925 \ - -e REPLICATION_HOSTNAME=1.123.45.7 \ - -p 9925:9925 \ - -p 9926:9926 \ - harperdb/harperdb -``` - -Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/versioned_docs/version-4.6/administration/compact.md b/versioned_docs/version-4.6/administration/compact.md deleted file mode 100644 index 1a71db14..00000000 --- a/versioned_docs/version-4.6/administration/compact.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Compact ---- - -# Compact - -Database files can grow quickly as you use them, sometimes impeding performance. Harper has multiple compact features that can be used to reduce database file size and potentially improve performance. The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. - -There are two options that Harper offers for compacting a Database. - -_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ - -More information on the storage configuration options can be [found here](../deployments/configuration#storage) - -### Copy compaction - -It is recommended that, to prevent any record loss, Harper is not running when performing this operation. - -This will copy a Harper database with compaction. If you wish to use this new database in place of the original, you will need to move/rename it to the path of the original database. - -This command should be run in the [CLI](../deployments/harper-cli) - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - -### Compact on start - -Compact on start is a more automated option that will compact **all** databases when Harper is started. Harper will not start until compact is complete. Under the hood it loops through all non-system databases, creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database to where the original one is located and remove any backups. - -Compact on start is initiated by config in `harperdb-config.yaml` - -_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ - -`compactOnStart` - _Type_: boolean; _Default_: false - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -Using CLI variables - -```bash ---STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true -``` - -```bash -STORAGE_COMPACTONSTART=true -STORAGE_COMPACTONSTARTKEEPBACKUP=true -``` diff --git a/versioned_docs/version-4.6/administration/harper-studio/create-account.md b/versioned_docs/version-4.6/administration/harper-studio/create-account.md deleted file mode 100644 index c0b0cc96..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [Harper Studio sign up page](https://studio.harperdb.io/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -1. Review the Privacy Policy and Terms of Service. -1. Click the sign up for free button. -1. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -1. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.6/administration/harper-studio/enable-mixed-content.md b/versioned_docs/version-4.6/administration/harper-studio/enable-mixed-content.md deleted file mode 100644 index 2784c191..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/enable-mixed-content.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -Enabling mixed content is required in cases where you would like to connect the Harper Studio to Harper Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -Note: If you want to connect insecure HTTP instances from the secure HTTPS Fabric Studio, you can enable mixed content temporarily. This isn't recommended in production systems. It would be better to add HTTPS / SSL Termination in front of your instances. But if you understand the risks, you can enabling mixed content is not recommended for production systems as it can expose users to security risks. - -## Steps to Connect to a Self-Hosted Harper Instance - -1. Log into [Harper Studio Cloud](https://fabric.harper.fast/). -2. Select an **organization**. -3. Click "+ New Cluster" on the top right corner. -4. In the "New Cluster" page, Fill out the required fields. - - Cluster Name - A name for your the cluster. - - Harper Deployment - Select "Self-Hosted" (This will allow you to connect to your self-hosted HarperDB instance). - - Support & Usage. - Keep the default option "Self Supported and Managed" - - Optional Cluster Load Balancer Host Name - This is an optional field. You can leave it blank. - - Instance - In the Dropdown, select whether your instance is `HTTP://` or `HTTPS://`. Unsure? Harper Instance by default is set to `HTTPS://` when installed in an instance. - - Host Name - The hostname or IP address of your HarperDB instance. Choose `localhost` if you are running the instance on the same machine you're accessing Harper Studio Cloud with. - - Port - The port number your HarperDB instance is listening on. Default is `9925`. - -5. Click "Create New Cluster". You have now successfully created a new cluster connecting to your local Harper Instance. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.6/administration/harper-studio/index.md b/versioned_docs/version-4.6/administration/harper-studio/index.md deleted file mode 100644 index 7d7192fe..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Harper Studio ---- - -# Harper Studio - -Harper Studio is the web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user-friendly interface without any knowledge of the underlying Harper API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - -Harper now includes a simplified local Studio that is packaged with all Harper installations and served directly from the instance. It can be enabled in the [configuration file](../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). - ---- - -## How does Studio Work? - -While Harper Studio is web based and hosted by us, all database interactions are performed on the Harper instance the studio is connected to. The Harper Studio loads in your browser, at which point you login to your Harper instances. Credentials are stored in your browser cache and are not transmitted back to Harper. All database interactions are made via the Harper Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -Harper Studio enables users to manage both Harper Cloud instances and privately hosted instances all from a single UI. All Harper instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.6/administration/harper-studio/instance-configuration.md b/versioned_docs/version-4.6/administration/harper-studio/instance-configuration.md deleted file mode 100644 index 06a6eb89..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/instance-configuration.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -Harper instance configuration can be viewed and managed directly through the Harper Studio. Harper Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Applications URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in Harper database user_ - -- Created Date (Harper Cloud only) - -- Region (Harper Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (Harper Cloud only) - -- Disk IOPS (Harper Cloud only) - -## Update Instance RAM - -Harper Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For Harper Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -1. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if Harper Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The Harper Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -1. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. -1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The Harper instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -1. The instance will begin deleting immediately. - -## Restart Instance - -The Harper Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -1. The instance will begin restarting immediately. - -## Instance Config (Read Only) - -A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/versioned_docs/version-4.6/administration/harper-studio/instance-metrics.md b/versioned_docs/version-4.6/administration/harper-studio/instance-metrics.md deleted file mode 100644 index e9b48939..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The Harper Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [Harper logs](../logging/standard-logging), and Harper Cloud alarms (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.6/administration/harper-studio/instances.md b/versioned_docs/version-4.6/administration/harper-studio/instances.md deleted file mode 100644 index 07da8097..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The Harper Studio allows you to administer all of your HarperDinstances in one place. Harper currently offers the following instance types: - -- **Harper Cloud Instance** Managed installations of Harper, what we call [Harper Cloud](../../deployments/harper-cloud/). -- **5G Wavelength Instance** Managed installations of Harper running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ -- **Enterprise Instance** Any Harper installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. Harper stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the Harper instances using the standard [Harper API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. Harper Cloud and Enterprise instances are listed together. - -## Create a New Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select your desired Instance Type. -1. For a Harper Cloud Instance or a Harper 5G Wavelength Instance, click **Create Harper Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial Harper instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial Harper instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ _More on instance specs\_\_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your Harper data will reside. Storage is provisioned based on space and IOPS._ _More on IOPS Impact on Performance\_\_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your Harper Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register Enterprise Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select **Register Enterprise Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Instance Password - - _The password of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Host - - _The host to access the Harper instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the Harper instance. Harper defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The Harper Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **Harper Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **Enterprise Instance** The instance will be removed from the Harper Studio only. This does not uninstall Harper from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -Harper instances can be resized on the [Instance Configuration](./instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a Harper user that is already configured in your Harper instance._ - -1. Enter the database password. - - _The password of a Harper user that is already configured in your Harper instance._ - -1. Click **Log In**. diff --git a/versioned_docs/version-4.6/administration/harper-studio/login-password-reset.md b/versioned_docs/version-4.6/administration/harper-studio/login-password-reset.md deleted file mode 100644 index 199d38ce..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your Harper Studio Account - -To log into your existing Harper Studio account: - -1. Navigate to the [Harper Studio](https://studio.harperdb.io/). -1. Enter your email address. -1. Enter your password. -1. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the Harper Studio password reset page. -1. Enter your email address. -1. Click **send password reset email**. -1. If the account exists, you will receive an email with a temporary password. -1. Navigate back to the Harper Studio login page. -1. Enter your email address. -1. Enter your temporary password. -1. Click **sign in**. -1. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -1. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the Harper Studio profile page. -1. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -1. Click the **Update Password** button. diff --git a/versioned_docs/version-4.6/administration/harper-studio/manage-applications.md b/versioned_docs/version-4.6/administration/harper-studio/manage-applications.md deleted file mode 100644 index 854b94b3..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/manage-applications.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Manage Applications ---- - -# Manage Applications - -[Harper Applications](../../developers/applications/) are enabled by default and can be configured further through the Harper Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of Harper Applications behavior. - -All Applications configuration and development is handled through the **applications** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **applications** in the instance control bar. - -_Note, the **applications** page will only be available to super users._ - -## Manage Applications - -The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your Harper Applications. The editor provides the ability to create new applications or import/deploy remote application packages. - -The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. - -The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the Harper Studio. - -## Things to Keep in Mind - -To learn more about developing Harper Applications, make sure to read through the [Applications](../../developers/applications/) documentation. - -When working with Applications in the Harper Studio, by default the editor will restart the Harper Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the Harper logs, which can be found on the [status page](./instance-metrics). - -The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your Harper instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your Harper instance click the "revert" button. - -## Accessing Your Application Endpoints - -Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [Harper HTTP port found in the Harper configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](./instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. - -Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. - -- **Standard REST Endpoints**\ - Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. -- **Augmented REST Endpoints**\ - Harper Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. -- **Fastify Routes**\ - If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [Harper Application Template](https://github.com/HarperDB/application-template/), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. - -## Creating a New Application - -1. From the application page, click the "+ app" button at the top right. -1. Click "+ Create A New Application Using The Default Template". -1. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. -1. Click OK. -1. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. - -## Editing an Application - -1. From the applications page, click the file you would like to edit from the file navigator on the left. -1. Edit the file with any changes you'd like. -1. Click "save" at the top right. Note, as mentioned above, when you save a file, the Harper Applications server will be restarted immediately. diff --git a/versioned_docs/version-4.6/administration/harper-studio/manage-databases-browse-data.md b/versioned_docs/version-4.6/administration/harper-studio/manage-databases-browse-data.md deleted file mode 100644 index 33482198..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/manage-databases-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Databases / Browse Data ---- - -# Manage Databases / Browse Data - -Manage instance databases/tables and browse data in tabular format with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage databases and tables, add new data, and more. - -## Manage Databases and Tables - -#### Create a Database - -1. Click the plus icon at the top right of the databases section. -1. Enter the database name. -1. Click the green check mark. - -#### Delete a Database - -Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. - -1. Click the minus icon at the top right of the databases section. -1. Identify the appropriate database to delete and click the red minus sign in the same row. -1. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired database from the databases section. -1. Click the plus icon at the top right of the tables section. -1. Enter the table name. -1. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -1. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired database from the databases section. -1. Click the minus icon at the top right of the tables section. -1. Identify the appropriate table to delete and click the red minus sign in the same row. -1. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -1. This expands the search filters. -1. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -1. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 1. Click **Import From URL**. - 1. The CSV will load, and you will be redirected back to browse table data. -1. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 1. Navigate to your desired CSV file and select it. - 1. Click **Insert X Records**, where X is the number of records in your CSV. - 1. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -1. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -1. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -1. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -1. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -1. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -1. Click the **delete icon**. -1. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.6/administration/harper-studio/manage-instance-roles.md b/versioned_docs/version-4.6/administration/harper-studio/manage-instance-roles.md deleted file mode 100644 index 3662013c..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/manage-instance-roles.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance role configuration is handled through the **roles** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **roles** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -1. Enter the role name. - -1. Click the green check mark. - -1. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. - -1. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -1. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -1. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -1. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the roles section. - -1. Identify the appropriate role to delete and click the red minus sign in the same row. - -1. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.6/administration/harper-studio/manage-instance-users.md b/versioned_docs/version-4.6/administration/harper-studio/manage-instance-users.md deleted file mode 100644 index 1ba13518..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance user configuration is handled through the **users** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -Harper instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](./manage-instance-roles)._ - -1. Click **Add User**. - -## Edit a User - -Harper instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -1. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 1. Click **Update Password**. - -1. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 1. Click **Update Role**. - -1. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 1. Click **Delete User**. diff --git a/versioned_docs/version-4.6/administration/harper-studio/manage-replication.md b/versioned_docs/version-4.6/administration/harper-studio/manage-replication.md deleted file mode 100644 index af69f9e1..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/manage-replication.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Manage Replication ---- - -# Manage Replication - -Harper instance clustering and replication can be configured directly through the Harper Studio. It is recommended to read through the [clustering documentation](../../reference/clustering/) first to gain a strong understanding of Harper clustering behavior. - -All clustering configuration is handled through the **replication** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **replication** in the instance control bar. - -Note, the **replication** page will only be available to super users. - ---- - -## Initial Configuration - -Harper instances do not have clustering configured by default. The Harper Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../reference/clustering/creating-a-cluster-user) document before proceeding. - -1. Enter Cluster User username. (Defaults to `cluster_user`). -1. Enter Cluster Password. -1. Review and/or Set Cluster Node Name. -1. Click **Enable Clustering**. - -At this point the Studio will restart your Harper Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. - ---- - -## Connect an Instance - -Harper Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -1. Identify the instance you would like to connect from the **unconnected instances** panel. - -1. Click the plus icon next the appropriate instance. - -1. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -Harper Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -1. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, database, and table for replication to be configured. - -1. For publish, click the toggle switch in the **publish** column. - -1. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.6/administration/harper-studio/organizations.md b/versioned_docs/version-4.6/administration/harper-studio/organizations.md deleted file mode 100644 index f93eeff0..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -Harper Studio organizations provide the ability to group Harper Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique Harper Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for Harper Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the Harper Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the **Create a New Organization** card. -1. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -1. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the Harper Studio Organizations page. -1. Identify the proper organization card and click the trash can icon. -1. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -1. Click the **Do It** button. - -## Manage Users - -Harper Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. In the **add user** box, enter the new user’s email address. -1. Click **Add User**. - -Users may or may not already be Harper Studio users when adding them to an organization. If the Harper Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a Harper Studio account, they will receive an email welcoming them to Harper Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. Click the appropriate user from the **existing users** section. -1. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their Harper Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. Click the appropriate user from the **existing users** section. -1. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -1. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_Harper billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -1. Click **Add Coupon**. -1. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.6/administration/harper-studio/query-instance-data.md b/versioned_docs/version-4.6/administration/harper-studio/query-instance-data.md deleted file mode 100644 index e85f5e15..00000000 --- a/versioned_docs/version-4.6/administration/harper-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the Harper Studio with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **query** in the instance control bar. -1. Enter your SQL query in the SQL query window. -1. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The Harper Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.6/administration/jobs.md b/versioned_docs/version-4.6/administration/jobs.md deleted file mode 100644 index c487f424..00000000 --- a/versioned_docs/version-4.6/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -Harper Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](../developers/operations-api/bulk-operations#csv-data-load) - -[csv file load](../developers/operations-api/bulk-operations#csv-file-load) - -[csv url load](../developers/operations-api/bulk-operations#csv-url-load) - -[import from s3](../developers/operations-api/bulk-operations#import-from-s3) - -[delete_records_before](../developers/operations-api/bulk-operations#delete-records-before) - -[export_local](../developers/operations-api/bulk-operations#export-local) - -[export_to_s3](../developers/operations-api/bulk-operations#export-to-s3) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.6/administration/logging/audit-logging.md b/versioned_docs/version-4.6/administration/logging/audit-logging.md deleted file mode 100644 index 209b4981..00000000 --- a/versioned_docs/version-4.6/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart Harper for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [Harper API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.6/administration/logging/index.md b/versioned_docs/version-4.6/administration/logging/index.md deleted file mode 100644 index bde1870a..00000000 --- a/versioned_docs/version-4.6/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -Harper provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): Harper maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): Harper uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): Harper stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.6/administration/logging/standard-logging.md b/versioned_docs/version-4.6/administration/logging/standard-logging.md deleted file mode 100644 index 044c2260..00000000 --- a/versioned_docs/version-4.6/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -Harper maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the Harper application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -Harper clustering utilizes two [NATS](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of Harper and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -Harper logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the Harper API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.6/administration/logging/transaction-logging.md b/versioned_docs/version-4.6/administration/logging/transaction-logging.md deleted file mode 100644 index 99222e42..00000000 --- a/versioned_docs/version-4.6/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -Harper offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. Harper leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering/). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.6/deployments/_category_.json b/versioned_docs/version-4.6/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/versioned_docs/version-4.6/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/versioned_docs/version-4.6/deployments/configuration.md b/versioned_docs/version-4.6/deployments/configuration.md deleted file mode 100644 index c58912c5..00000000 --- a/versioned_docs/version-4.6/deployments/configuration.md +++ /dev/null @@ -1,1235 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -Some configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase, such as `operationsApi`. - -To change a configuration value, edit the `harperdb-config.yaml` file and save any changes. **HarperDB must be restarted for changes to take effect.** - -Alternatively, all configuration values can also be modified using environment variables, command line arguments, or the operations API via the [`set_configuration` operation](../developers/operations-api/configuration#set-configuration). - -For nested configuration elements, use underscores to represent parent-child relationships. When accessed this way, elements are case-insensitive. - -For example, to disable logging rotation in the `logging` section: - -```yaml -logging: - rotation: - enabled: false -``` - -You could apply this change using: - -- Environment variable: `LOGGING_ROTATION_ENABLED=false` -- Command line variable: `--LOGGING_ROTATION_ENABLED false` -- Operations API (`set_configuration`): `logging_rotation_enabled: false` - -To change the `port` in the `http` section, use: - -- Environment variable: `HTTP_PORT=` -- Command line variable: `--HTTP_PORT ` -- Operations API (`set_configuration`): `http_port: ` - -To set the `operationsApi.network.port` to `9925`, use: - -- Environment variable: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variable: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Operations API (`set_configuration`): `operationsApi_network_port: 9925` - -_Note: Component configuration cannot be added or updated via CLI or ENV variables._ - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install Harper overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -Harper is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using Harper to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using Harper behind a proxy server or application server, all the remote ip addresses will be the same and Harper will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`corsAccessControlAllowHeaders` - _Type_: string; _Default_: 'Accept, Content-Type, Authorization' - -A string representation of a comma separated list of header keys for the [Access-Control-Allow-Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) header for OPTIONS requests. - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`maxHeaderSize` - _Type_: integer; _Default_: 16394 - -The maximum allowed size of HTTP headers in bytes. - -`requestQueueLimit` - _Type_: integer; _Default_: 20000 - -The maximum estimated request queue time, in milliseconds. When the queue is above this limit, requests will be rejected with a 503. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper component server uses for HTTPS connections. This requires a valid certificate and key. - -`http2` - _Type_: boolean; _Default_: false - -Enables HTTP/2 for the HTTP server. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - maxHeaderSize: 8192 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -```yaml -http: - mtls: true -``` - -or - -```yaml -http: - mtls: - required: true - user: user-name -``` - ---- - -### `threads` - -The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: - -`count` - _Type_: number; _Default_: One less than the number of logical cores/processors - -The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because Harper does have other threads at work), assuming Harper is the main service on a server. - -```yaml -threads: - count: 11 -``` - -`debug` - _Type_: boolean | object; _Default_: false - -This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. - -`debug.port` - The port to use for debugging the main thread `debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. `debug.host` - Specify the host interface to listen on `debug.waitForDebugger` - Wait for debugger before starting - -```yaml -threads: - debug: - port: 9249 -``` - -`maxHeapMemory` - _Type_: number; - -```yaml -threads: - maxHeapMemory: 300 -``` - -This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. - -`heapSnapshotNearLimit` - _Type_: boolean; - -```yaml -threads: - heapSnapshotNearLimit: true -``` - -This specifies that a heap snapshot should be taken when the heap limit is near the limit. - ---- - -### `replication` - -The `replication` section configures [Harper replication](../developers/replication/), which is used to create Harper clusters and replicate data between the instances. - -```yaml -replication: - hostname: server-one - url: wss://server-one:9925 - databases: '*' - routes: - - wss://server-two:9925 - port: null - securePort: 9933, - enableRootCAs: true -``` - -`hostname` - _Type_: string; - -The hostname of the current Harper instance. - -`url` - _Type_: string; - -The URL of the current Harper instance. - -`databases` - _Type_: string/array; _Default_: "\*" (all databases) - -Configure which databases to replicate. This can be a string for all database or an array for specific databases. The list can be a simple array of database names: - -```yaml -replication: - databases: - - system - - data - - mydb -``` - -The database list can also specify databases that are purely "sharded" databases. For databases that are marked as sharded, replication will _only_ create database subscription connections to nodes in the same shard. Sharding can still function without this setting, since the residency location for sharding can be determined for each table or each record. However, using this setting will reduce the overhead of connections in situations where all data is uniformly sharded, creating a simpler and more efficient replication topology. To mark databases as sharded, you can specify a list of databases with a `name` and `sharded` flag: - -```yaml -replication: - databases: - - name: system - - name: data - sharded: true -``` - -`routes` - _Type_: array; - -An array of routes to connect to other nodes. Each element in the array can be either a string or an object with `hostname`, `port` and optionally `startTime` properties. - -`startTime` - _Type_: string; ISO formatted UTC date string. - -Replication will attempt to catch up on all remote data upon setup. To start replication from a specific date, set this property. - -`revokedCertificates` - _Type_: array; - -An array of serial numbers of revoked certificates. If a connection is attempted with a certificate that is in this list, the connection will be rejected. - -```yaml -replication: - copyTablesToCatchUp: true - hostname: server-one - routes: - - wss://server-two:9925 # URL based route - - hostname: server-three # define a hostname and port - port: 9930 - startTime: 2024-02-06T15:30:00Z - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -`port` - _Type_: integer; - -The port to use for replication connections. - -`securePort` - _Type_: integer; _Default_: 9933 - -The port to use for secure replication connections. - -`enableRootCAs` - _Type_: boolean; _Default_: true - -When true, Harper will verify certificates against the Node.js bundled CA store. The bundled CA store is a snapshot of the Mozilla CA store that is fixed at release time. - -`blobTimeout` - _Type_: number; _Default_: 120000 - -Amount of time to wait for a blob to be transferred before timing out, measured in milliseconds. - -`failOver` - _Type_: boolean; _Default_: true - -When true, Harper will attempt to fail-over to subscribing to a different node if the current node is unreachable, to reach consistency. - -`shard` - _Type_: integer; - -This defines the shard id of this instance and is used in conjunction with the [Table Resource functions](../developers/replication/sharding#custom-sharding) `setResidency` & `setResidencyById` to programmatically route traffic to the proper shard. - ---- - -### `clustering` using NATS - -The `clustering` section configures the NATS clustering engine, this is used to replicate data between instances of Harper. - -_Note: There exist two ways to create clusters and replicate data in Harper. One option is to use native Harper replication over Websockets. The other option is to use_ [_NATS_](https://nats.io/about/) _to facilitate the cluster._ - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the Harper mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - -```yaml -clustering: - leafServer: - streams: - maxConsumeMsgs: 100 - maxIngestThreads: 2 -``` - -`maxConsumeMsgs` - _Type_: integer; _Default_: 100 - -The maximum number of messages a consumer can process in one go. - -`maxIngestThreads` - _Type_: integer; _Default_: 2 - -The number of Harper threads that are delegated to ingesting messages. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your Harper cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special Harper user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local Harper Studio, a GUI for Harper hosted on the server. A hosted version of the Harper Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or Harper Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures Harper logging across all Harper functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root` or `logging.path`). Many of the logging configuration properties can be set and applied without a restart (are dynamically applied). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: warn - -Control the verbosity of text event logs. - -```yaml -logging: - level: warn -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`console` - _Type_: boolean; _Default_: true - -Controls whether console.log and other console.\* calls (as well as another JS components that writes to `process.stdout` and `process.stderr`) are logged to the log file. By default, these are not logged to the log file, but this can be enabled: - -```yaml -logging: - console: true -``` - -`root` - _Type_: string; _Default_: \/log - -The directory path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`path` - _Type_: string; _Default_: \/log/hdb.log - -The file path where the log file will be written. - -```yaml -logging: - root: ~/hdb/log/hdb.log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: true - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log Harper logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - -`auditAuthEvents` - -`logFailed` - _Type_: boolean; _Default_: false - -Log all failed authentication events. - -_Example:_ `[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -`logSuccessful` - _Type_: boolean; _Default_: false - -Log all successful authentication events. - -_Example:_ `[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -```yaml -logging: - auditAuthEvents: - logFailed: false - logSuccessful: false -``` - -## Defining Separate Logging Configurations - -Harper's logger supports defining multiple logging configurations for different components in the system. Each logging configuration can be assigned its own `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. All logging defaults to the configuration of the "main" logger as configured above, but when logging is configured for different loggers, they will use their own configuration. Separate loggers can be defined: - -`logging.external` - -The `logging.external` section can be used to define logging for all external components that use the [`logger` API](../reference/globals). For example: - -```yaml -logging: - external: - level: warn - path: ~/hdb/log/apps.log -``` - -`http.logging` - -This section defines log configuration for HTTP logging. By default, HTTP requests are not logged, but defining this section will enable HTTP logging. Note that there can be substantive overhead to logging all HTTP requests. In addition to the standard logging configuration, the `http.logging` section also allows the following configuration properties to be set: - -- `timing` - This will log timing information -- `headers` - This will log the headers in each request (which can be very verbose) -- `id` - This will assign a unique id to each request and log it in the entry for each request. This is assigned as the `request.requestId` property and can be used to by other logging to track a request. - Note that the `level` will determine which HTTP requests are logged: -- `info` (or more verbose) - All HTTP requests -- `warn` - HTTP requests with a status code of 400 or above -- `error` - HTTP requests with a status code of 500 - -For example: - -```yaml -http: - logging: - timing: true - level: info - path: ~/hdb/log/http.log - ... rest of http config -``` - -`authentication.logging` - -This section defines log configuration for authentication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`mqtt.logging` - -This section defines log configuration for MQTT. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`replication.logging` - -This section defines log configuration for replication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`tls.logging` - -This section defines log configuration for TLS. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`storage.logging` - -This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`storage.logging` - -This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`analytics.logging` - -This section defines log configuration for analytics. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in Harper. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any Harper servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the Harper Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - domainSocket: /user/hdb/operations-server - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server - -The path to the Unix domain socket used to provide the Operations API through the CLI - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the Harper operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The Harper database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the Harper application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: true - -The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`compression.dictionary` _Type_: number; _Default_: null - -Path to a compression dictionary file - -`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` - -Only entries that are larger than this value (in bytes) will be compressed. - -```yaml -storage: - compression: - dictionary: /users/harperdb/dict.txt - threshold: 1000 -``` - -`compactOnStart` - _Type_: boolean; _Default_: false - -When `true` all non-system databases will be compacted when starting Harper, read more [here](../administration/compact). - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -Keep the backups made by compactOnStart. - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -`maxTransactionQueueTime` - _Type_: time; _Default_: 45s - -The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). - -```yaml -storage: - maxTransactionQueueTime: 2m -``` - -`noReadAhead` - _Type_: boolean; _Default_: false - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/database` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by Harper. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - -`blobPaths` - _Type_: string; _Default_: `/blobs` - -The `blobPaths` configuration sets where all the blob files should reside. This can be an array of paths, and if there are multiple, the blobs will be distributed across the paths. - -```yaml -storage: - blobPaths: - - /users/harperdb/big-storage -``` - -`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS - -Defines the page size of the database. - -```yaml -storage: - pageSize: 4096 -``` - -`reclamation` - -The reclamation section provides configuration for the reclamation process, which is responsible for reclaiming space when free space is low. For example: - -```yaml -storage: - reclamation: - threshold: 0.4 # Start storage reclamation efforts when free space has reached 40% of the volume space (default) - interval: 1h # Reclamation will run every hour (default) - evictionFactor: 100000 # A factor used to determine how much aggressively to evict cached entries (default) -``` - ---- - -### `tls` - -The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`ciphers` - _Type_: string; - -Allows specific ciphers to be set. - -If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: - -```yaml -tls: - - certificate: ~/hdb/keys/certificate1.pem - certificateAuthority: ~/hdb/keys/ca1.pem - privateKey: ~/hdb/keys/privateKey1.pem - host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. - - certificate: ~/hdb/keys/certificate2.pem - certificateAuthority: ~/hdb/keys/ca2.pem - privateKey: ~/hdb/keys/privateKey2.pem -``` - -Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. - ---- - -### `mqtt` - -The MQTT protocol can be configured in this section. - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 - mtls: false - webSocket: true - requireAuthentication: true -``` - -`port` - _Type_: number; _Default_: 1883 - -This is the port to use for listening for insecure MQTT connections. - -`securePort` - _Type_: number; _Default_: 8883 - -This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. - -`webSocket` - _Type_: boolean; _Default_: true - -This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. - -`requireAuthentication` - _Type_: boolean; _Default_: true - -This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). - -`mlts` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` - -This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. - -For example, you could specify that mTLS is required and will authenticate as "user-name": - -```yaml -mqtt: - network: - mtls: - user: user-name - required: true -``` - ---- - -### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. This configuration should be set before the database and table have been created. The configuration will not create the directories in the path, that must be done by the user. - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` - ---- - -### Components - -`` - _Type_: string - -The name of the component. This will be used to name the folder where the component is installed and must be unique. - -`package` - _Type_: string - -A reference to your [component](../reference/components/applications#adding-components-to-root) package. This could be a remote git repo, a local folder/file or an NPM package. Harper will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. - -Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) - -`port` - _Type_: number _Default_: whatever is set in `http.port` - -The port that your component should listen on. If no port is provided it will default to `http.port` - -```yaml -: - package: 'HarperDB-Add-Ons/package-name' - port: 4321 -``` diff --git a/versioned_docs/version-4.6/deployments/harper-cli.md b/versioned_docs/version-4.6/deployments/harper-cli.md deleted file mode 100644 index e559df01..00000000 --- a/versioned_docs/version-4.6/deployments/harper-cli.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: Harper CLI ---- - -# Harper CLI - -## Harper CLI - -The Harper command line interface (CLI) is used to administer [self-installed Harper instances](./install-harper/). - -### Installing Harper - -To install Harper with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, Harper installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](./configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -**Environment Variables** - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -**Command Line Arguments** - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -### Starting Harper - -To start Harper after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -### Stopping Harper - -To stop Harper once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -### Restarting Harper - -To restart Harper once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -### Getting the Harper Version - -To check the version of Harper that is installed run the following command: - -```bash -harperdb version -``` - ---- - -### Renew self-signed certificates - -To renew the Harper generated self-signed certificates, run: - -```bash -harperdb renew-certs -``` - ---- - -### Copy a database with compaction - -To copy a Harper database with compaction (to eliminate free-space and fragmentation), use - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - ---- - -### Get all available CLI commands - -To display all available Harper CLI commands along with a brief description run: - -```bash -harperdb help -``` - ---- - -### Get the status of Harper and clustering - -To display the status of the Harper process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - ---- - -### Backups - -Harper uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that Harper maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a Harper database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. - ---- - -## Operations API through the CLI - -Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. To call the operation use the following convention: ` =`. By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. - -Some examples are: - -```bash -$ harperdb describe_table database=dev table=dog - -schema: dev -name: dog -hash_attribute: id -audit: true -schema_defined: false -attributes: - - attribute: id - is_primary_key: true - - attribute: name - indexed: true -clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b -record_count: 10 -last_updated_record: 1724483231970.9949 -``` - -`harperdb set_configuration logging_level=error` - -`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` - -`harperdb get_components` - -`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` - -`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` - -`harperdb sql sql='select * from dev.dog where id="1"'` - -### Remote Operations - -The CLI can also be used to run operations on remote Harper instances. To do this, pass the `target` parameter with the HTTP address of the remote instance. You generally will also need to provide credentials and specify the `username` and `password` parameters, or you can set environment variables `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD`, for example: - -```bash -export CLI_TARGET_USERNAME=HDB_ADMIN -export CLI_TARGET_PASSWORD=password -harperdb describe_database database=dev target=https://server.com:9925 -``` - -The same set of operations API are available for remote operations as well. - -#### Remote Component Deployment - -When using remote operations, you can deploy a local component to the remote instance. If you omit the `package` parameter, you can deploy the current directory. This will package the current directory and send it to the target server (also `deploy` is allowed as an alias to `deploy_component`): - -```bash -harperdb deploy target=https://server.com:9925 -``` - -If you are interacting with a cluster, you may wish to include the `replicated=true` parameter to ensure that the deployment operation is replicated to all nodes in the cluster. You will also need to restart afterwards to apply the changes (here seen with the replicated parameter): - -```bash -harperdb restart target=https://server.com:9925 replicated=true -``` diff --git a/versioned_docs/version-4.6/deployments/harper-cloud/alarms.md b/versioned_docs/version-4.6/deployments/harper-cloud/alarms.md deleted file mode 100644 index 372807e5..00000000 --- a/versioned_docs/version-4.6/deployments/harper-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -Harper Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harper-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | --------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harper-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harper-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harper-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.6/deployments/harper-cloud/index.md b/versioned_docs/version-4.6/deployments/harper-cloud/index.md deleted file mode 100644 index c0785d0d..00000000 --- a/versioned_docs/version-4.6/deployments/harper-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Harper Cloud ---- - -# Harper Cloud - -[Harper Cloud](https://studio.harperdb.io/) is the easiest way to test drive Harper, it’s Harper-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. Harper Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new Harper Cloud instance in the Harper Studio. diff --git a/versioned_docs/version-4.6/deployments/harper-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.6/deployments/harper-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 6ea4c7d2..00000000 --- a/versioned_docs/version-4.6/deployments/harper-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While Harper Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.6/deployments/harper-cloud/iops-impact.md b/versioned_docs/version-4.6/deployments/harper-cloud/iops-impact.md deleted file mode 100644 index 18e9f948..00000000 --- a/versioned_docs/version-4.6/deployments/harper-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -Harper, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running Harper. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that Harper performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers Harper Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## Harper Cloud Storage - -Harper Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all Harper Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html). - -## Estimating IOPS for Harper Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact Harper Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. Harper utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any Harper operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to Harper’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, Harper should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.6/deployments/harper-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.6/deployments/harper-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index b6104f7c..00000000 --- a/versioned_docs/version-4.6/deployments/harper-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your Harper instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -Harper on Verizon 5G Wavelength brings Harper closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from Harper to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -Harper 5G Wavelength Instance Specs While Harper 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## Harper 5G Wavelength Storage - -Harper 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of Harper, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger Harper volume. Learn more about the [impact of IOPS on performance here](./iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.6/deployments/install-harper/index.md b/versioned_docs/version-4.6/deployments/install-harper/index.md deleted file mode 100644 index d7ea3cf6..00000000 --- a/versioned_docs/version-4.6/deployments/install-harper/index.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Install Harper ---- - -# Install Harper - -## Install Harper - -This documentation contains information for installing Harper locally. Note that if you’d like to get up and running quickly, you can deploy it to [Harper Fabric](https://fabric.harper.fast) our distributed data application platform service. Harper is a cross-platform database; we recommend Linux for production use. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. Harper can also run on Windows and Mac, for development purposes only. Note: For Windows, we strongly recommend the use of Windows Subsystem for Linux (WSL). - -Harper runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing Harper, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start Harper - -Then you can install Harper with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -Harper will automatically start after installation. Harper's installation can be configured with numerous options via CLI arguments, for more information visit the [Harper Command Line Interface](./harper-cli) guide. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harper/linux). - -## With Docker - -If you would like to run Harper in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a Harper container. - -## Offline Install - -If you need to install Harper on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -## Installation on Less Common Platforms - -Harper comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.6/deployments/install-harper/linux.md b/versioned_docs/version-4.6/deployments/install-harper/linux.md deleted file mode 100644 index cc312bac..00000000 --- a/versioned_docs/version-4.6/deployments/install-harper/linux.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install Harper. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to Harper with sudo privileges exists -1. An additional volume for storing Harper files is attached to the Linux instance -1. Traffic to ports 9925 (Harper Operations API) 9926 (Harper Application Interface) and 9932 (Harper Clustering) is permitted - -While you will need to access Harper through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start Harper - -Here is an example of installing Harper with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing Harper with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../configuration): - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" -``` - -#### Start Harper on Boot - -Harper will automatically start after installation. If you wish Harper to start when the OS boots, you have two options: - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=Harper - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [Harper Command Line Interface guide](../harper-cli) and the [Harper Configuration File guide](../configuration). diff --git a/versioned_docs/version-4.6/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.6/deployments/upgrade-hdb-instance.md deleted file mode 100644 index 768b9323..00000000 --- a/versioned_docs/version-4.6/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Upgrade a Harper Instance ---- - -# Upgrade a Harper Instance - -This document describes best practices for upgrading self-hosted Harper instances. Harper can be upgraded using a combination of npm and built-in Harper upgrade scripts. Whenever upgrading your Harper installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted Harper instances only. All [Harper Cloud instances](./harper-cloud/) will be upgraded by the Harper Cloud team. - -## Upgrading - -Upgrading Harper is a two-step process. First the latest version of Harper must be downloaded from npm, then the Harper upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of Harper using `npm install -g harperdb`. - - Note `-g` should only be used if you installed Harper globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - Harper will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -Harper supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading Harper, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that Harper is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure Harper is not running: - -```bash -harperdb stop -``` - -Uninstall Harper. Note, this step is not required, but will clean up old artifacts of Harper. We recommend removing all other Harper installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install Harper globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start Harper - -```bash -harperdb start -``` - ---- - -## Upgrading Nats to Plexus 4.4 - -To upgrade from NATS clustering to Plexus replication, follow these manual steps. They are designed for a fully replicating cluster to ensure minimal disruption during the upgrade process. - -The core of this upgrade is the _bridge node_. This node will run both NATS and Plexus simultaneously, ensuring that transactions are relayed between the two systems during the transition. The bridge node is crucial in preventing any replication downtime, as it will handle transactions from NATS nodes to Plexus nodes and vice versa. - -### Enabling Plexus - -To enable Plexus on a node that is already running NATS, you will need to update [two values](./configuration) in the `harperdb-config.yaml` file: - -```yaml -replication: - url: wss://my-cluster-node-1:9925 - hostname: node-1 -``` - -`replication.url` – This should be set to the URL of the current Harper instance. - -`replication.hostname` – Since we are upgrading from NATS, this value should match the `clustering.nodeName` of the current instance. - -### Upgrade Steps - -1. Set up the bridge node: - - Choose one node to be the bridge node. - - On this node, follow the "Enabling Plexus" steps from the previous section, but **do not disable NATS clustering on this instance.** - - Stop the instance and perform the upgrade. - - Start the instance. This node should now be running both Plexus and NATS. -1. Upgrade a node: - - Choose a node that needs upgrading and enable Plexus by following the "Enable Plexus" steps. - - Disable NATS by setting `clustering.enabled` to `false`. - - Stop the instance and upgrade it. - - Start the instance. - - Call [`add_node`](../developers/operations-api/clustering#add-node) on the upgraded instance. In this call, omit `subscriptions` so that a fully replicating cluster is built. The target node for this call should be the bridge node. _Note: depending on your setup, you may need to expand this `add_node` call to include_ [_authorization and/or tls information_](../developers/operations-api/clustering#add-node)_._ - -```json -{ - "operation": "add_node", - "hostname:": "node-1", - "url": "wss://my-cluster-node-1:9925" -} -``` - -1. Repeat Step 2 on all remaining nodes that need to be upgraded. -1. Disable NATS on the bridge node by setting `clustering.enabled` to `false` and restart the instance. - -Your cluster upgrade should now be complete, with no NATS processes running on any of the nodes. diff --git a/versioned_docs/version-4.6/developers/_category_.json b/versioned_docs/version-4.6/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/versioned_docs/version-4.6/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/versioned_docs/version-4.6/developers/applications/caching.md b/versioned_docs/version-4.6/developers/applications/caching.md deleted file mode 100644 index 317f3f6b..00000000 --- a/versioned_docs/version-4.6/developers/applications/caching.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -title: Caching ---- - -# Caching - -Harper has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, Harper makes an ideal data caching server. Harper can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. Harper also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, Harper is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](./defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: - -- `expiration` - The amount of time until a record goes stale. -- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). -- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -#### How `scanInterval` Determines the Eviction Cycle - -`scanInterval` determines fixed clock-aligned times when eviction runs, and these times are the same regardless of when the server started. Harper takes the `scanInterval` and divides the TTL (`expiration` + `eviction`) into evenly spaced “anchor times.” These anchors are calculated in the local timezone of the server. This allows Harper to “snap” the eviction schedule to predictable points on the clock, such as every 15 minutes or every 6 hours, based on the interval length. As a result: - -- The server’s startup time does not affect when eviction runs. -- Eviction timings are deterministic and timezone-aware. -- For any given configuration, the eviction schedule is the same across restarts and across servers in the same local timezone. - -#### Example: 1-Hour Expiration - -`expiration` = 1 hour with default `scanInterval` (15 minutes, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 00:15 -> 00:30 -> 00:45 -> 01:00 -> ... continuing every 15 minutes ... - -If the server starts at 12:05 it does not run eviction at 12:20 or “15 minutes after startup.” Instead, the next scheduled anchor is 12:15, then 12:30, 12:45, 13:00, etc. The schedule is clock-aligned, not startup-aligned. - -#### Example: 1-Day Expiration - -`expiration` = 1 day with default `scanInterval` (6 hours, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 06:00 -> 12:00 -> 18:00 -> ... continuing every 6 hours ... - -If the server starts at 12:05 the next matching eviction time is 18:00 the same day, then 00:00, 06:00, 12:00, 18:00, etc. If the server starts at 19:30 the schedule does not shift. Instead, the next anchor time is 00:00, and the regular 6-hour cycle continues. - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the Harper environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyCache } = tables; -MyCache.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), Harper will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -Harper handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - setInterval(() => { // every second retrieve more data - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - }, 1000); - } - async get() { -... -``` - -Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record. -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, Harper will only run the subscribe method on one thread. Harper is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in Harper. Caching tables also have [subscription capabilities](./caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of Harper, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - const post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of post.comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). - -## Caching Flow - -It may be helpful to understand the flow of a cache request. When a request is made to a caching table: - -- Harper will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. - - If the record is not in the cache, Harper will first check if there is a current request to get the record from the source. If there is, Harper will wait for the request to complete and return the record from the cache. - - If not, Harper will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. - - If the record is in the cache, Harper will check if the record is stale. If the record is not stale, Harper will immediately return the record from the cache. If the record is stale, Harper will call the `get()` method on the source to retrieve the record. - - The record will then be stored in the cache. This will write the record to the cache in a separate asynchronous/background write-behind transaction, so it does not block the current request, then return the data immediately once it has it. -- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. - -### Caching Flow with Write-Through - -When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: - -- Harper will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. -- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. -- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. - - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). - - The transaction writes will then be written the local caching table. -- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/versioned_docs/version-4.6/developers/applications/data-loader.md b/versioned_docs/version-4.6/developers/applications/data-loader.md deleted file mode 100644 index b4059207..00000000 --- a/versioned_docs/version-4.6/developers/applications/data-loader.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: Data Loader ---- - -# Data Loader - -The Data Loader is a built-in component that provides a reliable mechanism for loading data from JSON or YAML files into Harper tables as part of component deployment. This feature is particularly useful for ensuring specific records exist in your database when deploying components, such as seed data, configuration records, or initial application data. - -## Configuration - -To use the Data Loader, first specify your data files in the `config.yaml` in your component directory: - -```yaml -dataLoader: - files: 'data/*.json' -``` - -The Data Loader is an [Extension](../../reference/components#extensions) and supports the standard `files` configuration option. - -## Data File Format - -Data files can be structured as either JSON or YAML files containing the records you want to load. Each data file must specify records for a single table - if you need to load data into multiple tables, create separate data files for each table. - -### Basic Example - -Create a data file in your component's data directory (one table per file): - -```json -{ - "database": "myapp", - "table": "users", - "records": [ - { - "id": 1, - "username": "admin", - "email": "admin@example.com", - "role": "administrator" - }, - { - "id": 2, - "username": "user1", - "email": "user1@example.com", - "role": "standard" - } - ] -} -``` - -### Multiple Tables - -To load data into multiple tables, create separate data files for each table: - -**users.json:** - -```json -{ - "database": "myapp", - "table": "users", - "records": [ - { - "id": 1, - "username": "admin", - "email": "admin@example.com" - } - ] -} -``` - -**settings.yaml:** - -```yaml -database: myapp -table: settings -records: - - id: 1 - setting_name: app_name - setting_value: My Application - - id: 2 - setting_name: version - setting_value: '1.0.0' -``` - -## File Organization - -You can organize your data files in various ways: - -### Single File Pattern - -```yaml -dataLoader: - files: 'data/seed-data.json' -``` - -### Multiple Files Pattern - -```yaml -dataLoader: - files: - - 'data/users.json' - - 'data/settings.yaml' - - 'data/initial-products.json' -``` - -### Glob Pattern - -```yaml -dataLoader: - files: 'data/**/*.{json,yaml,yml}' -``` - -## Loading Behavior - -When Harper starts up with a component that includes the Data Loader: - -1. The Data Loader reads all specified data files (JSON or YAML) -1. For each file, it validates that a single table is specified -1. Records are inserted or updated based on content hash comparison: - - New records are inserted if they don't exist - - Existing records are updated only if the data file content has changed - - User modifications made via Operations API or other methods are preserved - those records won't be overwritten - - Users can add extra fields to data-loader records without blocking future updates to the original fields -1. The Data Loader uses SHA-256 content hashing stored in a system table (`hdb_dataloader_hash`) to track which records it has loaded and detect changes - -### Change Detection - -The Data Loader intelligently handles various scenarios: - -- **New records**: Inserted with their content hash stored -- **Unchanged records**: Skipped (no database writes) -- **Changed data file**: Records are updated using `patch` to preserve any extra fields users may have added -- **User-created records**: Records created outside the Data Loader (via Operations API, REST, etc.) are never overwritten -- **User-modified records**: Records modified after being loaded are preserved and not overwritten -- **User-added fields**: Extra fields added to data-loader records are preserved during updates - -This approach ensures data files can be safely reloaded across deployments and node scaling without losing user modifications. - -Note: While the Data Loader can create tables automatically by inferring the schema from the provided records, it's recommended to define your table schemas explicitly using the [graphqlSchema](../applications/defining-schemas) component for better control and type safety. - -## Best Practices - -1. **Define Schemas First**: While the Data Loader can infer schemas, it's strongly recommended to define your table schemas and relations explicitly using the [graphqlSchema](../applications/defining-schemas) component before loading data. This ensures proper data types, constraints, and relationships between tables. - -1. **One Table Per File**: Remember that each data file can only load records into a single table. Organize your files accordingly. - -1. **Idempotency**: Design your data files to be idempotent - they should be safe to load multiple times without creating duplicate or conflicting data. - -1. **Version Control**: Include your data files in version control to ensure consistency across deployments. - -1. **Environment-Specific Data**: Consider using different data files for different environments (development, staging, production). - -1. **Data Validation**: Ensure your data files are valid JSON or YAML and match your table schemas before deployment. - -1. **Sensitive Data**: Avoid including sensitive data like passwords or API keys directly in data files. Use environment variables or secure configuration management instead. - -## Example Component Structure - -``` -my-component/ -├── config.yaml -├── data/ -│ ├── users.json -│ ├── roles.json -│ └── settings.json -├── schemas.graphql -└── roles.yaml -``` - -With this structure, your `config.yaml` might look like: - -```yaml -# Load environment variables first -loadEnv: - files: '.env' - -# Define schemas -graphqlSchema: - files: 'schemas.graphql' - -# Define roles -roles: - files: 'roles.yaml' - -# Load initial data -dataLoader: - files: 'data/*.json' - -# Enable REST endpoints -rest: true -``` - -## Related Documentation - -- [Built-In Components](../../reference/components/built-in-extensions) -- [Extensions](../../reference/components/extensions) -- [Bulk Operations](../operations-api/bulk-operations) - For loading data via the Operations API diff --git a/versioned_docs/version-4.6/developers/applications/debugging.md b/versioned_docs/version-4.6/developers/applications/debugging.md deleted file mode 100644 index bd9d2622..00000000 --- a/versioned_docs/version-4.6/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -Harper components and applications run inside the Harper process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the Harper entry script with your IDE, or you can start Harper in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run Harper in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use Harper's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by Harper. This logger can be used to output messages directly to the Harper log using standardized logging level functions, described below. The log level can be set in the [Harper Configuration File](../../deployments/configuration). - -Harper Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The Harper Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the Studio Status page. Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the Harper log. diff --git a/versioned_docs/version-4.6/developers/applications/define-routes.md b/versioned_docs/version-4.6/developers/applications/define-routes.md deleted file mode 100644 index c442f9f1..00000000 --- a/versioned_docs/version-4.6/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -Harper’s applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](./define-routes#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](./req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.6/developers/applications/defining-roles.md b/versioned_docs/version-4.6/developers/applications/defining-roles.md deleted file mode 100644 index 365aa132..00000000 --- a/versioned_docs/version-4.6/developers/applications/defining-roles.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Defining Application Roles ---- - -# Defining Application Roles - -Applications are more than just tables and endpoints — they need access rules. Harper lets you define roles directly in your application so you can control who can do what, without leaving your codebase. - -Let’s walk through creating a role, assigning it, and seeing it in action. - -## Step 1: Declare a Role - -First, point Harper to a roles configuration file. Add this to your `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -Then create a simple `roles.yaml` in your application directory. For example, here’s a role that can only read and insert data into the `Dog` table: - -```yaml -dog-reader: - super_user: false - data: - Dog: - read: true - insert: true -``` - -When Harper starts up, it will create this role (or update it if it already exists). - -## Step 2: Create a User for the Role - -Next, create a non-super_user user and assign them this role. You can do this with the [Users and Roles API](../security/users-and-roles) (requires a super_user to run): - -```bash -curl -u admin:password -X POST http://localhost:9926 \ - -H "Content-Type: application/json" \ - -d '{ - "operation": "add_user", - "username": "alice", - "password": "password", - "role": "dog_reader" - }' -``` - -Now you have a user named `alice` with the `dog_reader` role. - -## Step 3: Make Requests as Different Users - -Authenticate requests as `alice` to see how her role works: - -```bash -# allowed (insert, role permits insert) -curl -u alice:password -X POST http://localhost:9926/Dog/ \ - -H "Content-Type: application/json" \ - -d '{"name": "Buddy", "breed": "Husky"}' - -# not allowed (delete, role does not permit delete) -curl -u alice:password -X DELETE http://localhost:9926/Dog/1 -``` - -The first request succeeds with a `200 OK`. The second fails with a `403 Forbidden`. - -Now compare with a super_user: - -```bash -# super_user can delete -curl -u admin:password -X DELETE http://localhost:9926/Dog/1 -``` - -This succeeds because the super_user role has full permissions. - -## Where to Go Next - -This page gave you the basics - declare a role, assign it, and see it work. - -For more advanced scenarios, including: - -- defining multiple databases per role, -- granting fine-grained attribute-level permissions, -- and the complete structure of `roles.yaml`, - -see the [Roles Reference](../../reference/roles). diff --git a/versioned_docs/version-4.6/developers/applications/defining-schemas.md b/versioned_docs/version-4.6/developers/applications/defining-schemas.md deleted file mode 100644 index fba870e8..00000000 --- a/versioned_docs/version-4.6/developers/applications/defining-schemas.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in Harper's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that Harper uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -Database naming: the default "data" database is generally a good default choice for tables in applications that will not be reused in other applications (and don't need to worry about staying in a separate namespace). Application with many tables may wish to organize the tables into separate databases (but remember that transactions do not preserve atomicity across different databases, only across tables in the same database). For components that are designed for re-use, it is recommended that you use a database name that is specific to the component (e.g. "my-component-data") to avoid name collisions with other components. - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -### Relationships: `@relationship` - -Defining relationships is the foundation of using "join" queries in Harper. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. - -#### `@relationship(from: attribute)` - -This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: - -```graphql -type Product @table @export { - id: ID @primaryKey - brandId: ID @indexed - brand: Brand @relationship(from: brandId) -} -type Brand @table @export { - id: ID @primaryKey -} -``` - -Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resources/) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). - -Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: - -```graphql -type Product @table @export { - id: ID @primaryKey - featureIds: [ID] @indexed # array of ids - features: [Feature] @relationship(from: featureIds) # array of referenced feature records -} -type Feature @table { - id: ID @primaryKey - ... -} -``` - -#### `@relationship(to: attribute)` - -This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: - -```graphql -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: brandId) -} -``` - -Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. - -Note that schemas can also reference themselves with relationships, allowing records to define relationships like parent-child relationships between records in the same table. Also note, that for a many-to-many relationship, you must not combine the `to` and `from` property in the same relationship directive. - -### Computed Properties: `@computed` - -The `@computed` directive specifies that a field is computed based on other fields in the record. This is useful for creating derived fields that are not stored in the database, but are computed when specific record fields is queried/accessed. The `@computed` directive must be used in combination with a field that is a function that computes the value of the field. For example: - -```graphql -type Product @table { - id: ID @primaryKey - price: Float - taxRate: Float - totalPrice: Float @computed(from: "price + (price * taxRate)") -} -``` - -The `from` argument specifies the expression that computes the value of the field. The expression can reference other fields in the record. The expression is evaluated when the record is queried or indexed. - -The `computed` directive may also be defined in a JavaScript module, which is useful for more complex computations. You can specify a computed attribute, and then define the function with the `setComputedAttribute` method. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed -} -``` - -```javascript -tables.Product.setComputedAttribute('totalPrice', (record) => { - return record.price + record.price * record.taxRate; -}); -``` - -Computed properties may also be indexed, which provides a powerful mechanism for creating indexes on derived fields with custom querying capabilities. This can provide a mechanism for composite indexes, custom full-text indexing, vector indexing, or other custom indexing strategies. A computed property can be indexed by adding the `@indexed` directive to the computed property. When using a JavaScript module for a computed property that is indexed, it is highly recommended that you specify a `version` argument to ensure that the computed attribute is re-evaluated when the function is updated. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed(version: 1) @indexed -} -``` - -If you were to update the `setComputedAttribute` function for the `totalPrice` attribute, to use a new formula, you must increment the `version` argument to ensure that the computed attribute is re-indexed (note that on a large database, re-indexing may be a lengthy operation). Failing to increment the `version` argument with a modified function can result in an inconsistent index. The computed function must be deterministic, and should not have side effects, as it may be re-evaluated multiple times during indexing. - -Note that computed properties will not be included by default in a query result, you must explicitly include them in query results using the `select` query function. - -Another example of using a computed custom index, is that we could index all the comma-separated words in a `tags` property by doing (similar techniques are used for full-text indexing): - -```graphql -type Product @table { - id: ID @primaryKey - tags: String # comma delimited set of tags - tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed # split and index the tags -} -``` - -For more in-depth information on computed properties, visit our blog [here](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated if no primary key is provided. When a primary key is auto-generated, it will be a UUID (as a string) if the primary key type is `String` or `ID`. If the primary key type is `Int`, `Long`, or `Any`, then the primary key will be an auto-incremented number. Using numeric primary keys is more efficient than using UUIDs. Note that if the type is `Int`, the primary key will be limited to 32-bit, which can be limiting and problematic for large tables. It is recommended that if you will be relying on auto-generated keys, that you use a primary key type of `Long` or `Any` (the latter will allow you to also use strings as primary keys). - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. When an attribute is indexed, Harper will create secondary index from the data in this field for fast/efficient querying using this field. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -A standard index will index the values in each field, so you can query directly by those values. If the field's value is an array, each of the values in the array will be indexed (you can query by any individual value). - -#### Vector Indexing - -The `@indexed` directive can also specify a `type`. To use vector indexing, you can specify the `type` as `HNSW` for Hierarchical Navigable Small World indexing. This will create a vector index for the attribute. For example: - -```graphql -type Product @table { - id: Long @primaryKey - textEmbeddings: [Float] @indexed(type: "HNSW") -} -``` - -HNSW indexing finds the nearest neighbors to a search vector. To use this, you can query with a `sort` parameter, for example: - -```javascript -let results = Product.search({ - sort: { attribute: 'textEmbeddings', target: searchVector }, - limit: 5, // get the five nearest neighbors -}); -``` - -This can be used in combination with other conditions as well, for example: - -```javascript -let results = Product.search({ - conditions: [{ attribute: 'price', comparator: 'lt', value: 50 }], - sort: { attribute: 'textEmbeddings', target: searchVector }, - limit: 5, // get the five nearest neighbors -}); -``` - -HNSW supports several additional arguments to the `@indexed` directive to adjust the HNSW parameters: - -- `distance` - Define the distance function. This can be set to 'euclidean' or 'cosine' (uses negative of cosine similarity). The default is cosine. -- `efConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors. A higher value can yield better recall, and a lower value can have better performance. If `efSearchConstruction` is set, this is only applied to indexing. The default is 100. -- `M` - The preferred number of connections at each layer in the HNSW graph. A higher number uses more space but can be helpful when the intrinsic dimensionality of the data is higher. A lower number can be more efficient. The default is 16. -- `optimizeRouting` - This uses a heuristic to avoid graph connections that match existing indirect connections (connections through another node). This can yield more efficient graph traversals for the same M setting. This is a number between 0 and 1 and a higher value will more aggressively omit connections with alternate paths. Setting this to 0 will disable route optimizing and follow the traditional HNSW algorithm for creating connections. The default is 0.5. -- `mL` - The normalization factor for level generation, by default this is computed from `M`. -- `efSearchConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors for searching. The default is 50. - -For exmpale - -```graphql -type Product @table { - id: Long @primaryKey - textEmbeddings: [Float] @indexed(type: "HNSW", distance: "euclidean", optimizeRouting: 0, efSearchConstruction: 100) -} -``` - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself. - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -Harper supports the following field types in addition to user defined (object) types: - -- `String`: String/text -- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647) -- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992) -- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available) -- `BigInt`: Any integer (negative or positive) with less than 300 digits (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately) -- `Boolean`: true or false -- `ID`: A string (but indicates it is not intended to be human readable) -- `Any`: Any primitive, object, or array is allowed -- `Date`: A Date object -- `Bytes`: Binary data as a Buffer or Uint8Array -- `Blob`: Binary data as a [Blob](../../reference/blob), designed for large blocks of data that can be streamed. It is recommend that you use this for binary data that will typically be larger than 20KB. - -#### Renaming Tables - -It is important to note that Harper does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. - -### OpenAPI Specification - -_The_ [_OpenAPI Specification_](https://spec.openapis.org/oas/v3.1.0) _defines a standard, programming language-agnostic interface description for HTTP APIs, which allows both humans and computers to discover and understand the capabilities of a service without requiring access to source code, additional documentation, or inspection of network traffic._ - -If a set of endpoints are configured through a Harper GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. - -_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/versioned_docs/version-4.6/developers/applications/index.md b/versioned_docs/version-4.6/developers/applications/index.md deleted file mode 100644 index 804a219d..00000000 --- a/versioned_docs/version-4.6/developers/applications/index.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: Applications ---- - -# Applications - -Harper is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of Harper instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that Harper provides by building a Harper application, a fundamental building-block of the Harper ecosystem. - -When working through this guide, we recommend you use the [Harper Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -Before we get started, let's clarify some terminology that is used throughout the documentation. - -**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. The application you will build here is a component. In addition to applications, components also encompass extensions. - -> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. - -**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. - -**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. - -All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. - -Extensions can also depend on other extensions. For example, the [`@harperdb/apollo`](https://github.com/HarperDB/apollo) extension depends on the built-in `graphqlSchema` extension to create a cache table for Apollo queries. Applications can then use the `@harperdb/apollo` extension to implement an Apollo GraphQL backend server. - -```mermaid -flowchart TD - subgraph Applications - direction TB - NextJSApp["Next.js App"] - ApolloApp["Apollo App"] - CustomResource["Custom Resource"] - end - - subgraph Extensions - direction TB - subgraph Custom - NextjsExt["@harperdb/nextjs"] - ApolloExt["@harperdb/apollo"] - end - subgraph Built-In - GraphqlSchema["graphqlSchema"] - JsResource["jsResource"] - Rest["rest"] - end - end - - subgraph Core - direction TB - Database["database"] - FileSystem["file-system"] - Networking["networking"] - end - - NextJSApp --> NextjsExt - ApolloApp --> ApolloExt - CustomResource --> JsResource & GraphqlSchema & Rest - - NextjsExt --> Networking - NextjsExt --> FileSystem - ApolloExt --> GraphqlSchema - ApolloExt --> Networking - - GraphqlSchema --> Database - JsResource --> Database - Rest --> Networking -``` - -> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](../reference/components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -Beyond applications and extensions, components are further classified as built-in or custom. **Built-in** components are included with Harper by default and can be directly referenced by their name. The `graphqlSchema`, `rest`, and `jsResource` extensions used in the previous application example are all examples of built-in extensions. **Custom** components must use external references, generally npm or GitHub packages, and are often included as dependencies within the `package.json` of the component. - -> Harper maintains a number of custom components that are available on `npm` and `GitHub`, such as the [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) extension or the [`@harperdb/status-check`](https://github.com/HarperDB/status-check) application. - -Harper does not currently include any built-in applications, making "custom applications" a bit redundant. Generally, we just say "application". However, there is a multitude of both built-in and custom extensions, and so the documentation refers to them as such. A complete list of built-in extensions is available in the [Built-In Extensions](../reference/components/built-in-extensions) documentation page, and the list of custom extensions and applications is available below. - -This guide is going to walk you through building a basic Harper application using a set of built-in extensions. - -> The Reference -> Components section of the documentation contains a [complete reference for all aspects of components](../reference/components), applications, extensions, and more. - -## Custom Functionality with JavaScript - -[The getting started guide](/learn) covers how to build an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In Harper, data is accessed through our [Resource API](../reference/resources/), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the Harper provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - static loadAsInstance = false; - async get(target) { - const record = await super.get(target); - return { - ...record, // include all properties from the record - humanAge: 15 + record.age * 5, // silly calculation of human age equivalent - }; - } -} -``` - -Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. The instance holds information about the primary key of the record so updates and actions can be applied to the correct record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](./rest#selectproperties)). - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -We use the new table's (static) `get()` method to retrieve a breed by id. Harper will maintain the current context, ensuring that we are accessing the data atomically, in a consistent snapshot across tables. This provides: - -1. Automatic tracking of most recently updated timestamps across resources for caching purposes -2. Sharing of contextual metadata (like user who requested the data) -3. Transactional atomicity for any writes (not needed in this get operation, but important for other operations) - -The resource methods are automatically wrapped with a transaction and will automatically commit the changes when the method finishes. This allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - static loadAsInstance = false; - async get(target) { - // get the Dog record - const record = await super.get(target); - // get the Breed record - let breedDescription = await Breed.get(record.breed); - return { - ...record, - breedDescription, - }; - } -} -``` - -The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - static loadAsInstance = false; - async post(target, data) { - if (data.action === 'add-trick') { - const record = this.update(target); - record.tricks.push(data.trick); - } - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). - -We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post()` method or `put()` method to do this. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - static loadAsInstance = false; - async post(target, data) { - if (data.action === 'add-trick') { - const context = this.getContext(); - // if we want to skip the default permission checks, we can turn off checkPermissions: - target.checkPermissions = false; - const record = this.update(target); - // and do our own/custom permission check: - if (record.owner !== context.user?.username) { - throw new Error('Can not update this record'); - } - record.tricks.push(data.trick); - } - } -} -``` - -Any methods that are not defined will fall back to Harper's default authorization procedure based on users' roles. If you are using/extending a table, this is based on Harper's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to url like / to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get(target) { - return (await fetch(`https://best-dog-site.com/${target}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](applications/caching) provides much more information on how to use Harper's powerful caching capabilities and set up data sources. - -Harper provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../reference/globals) and the [Resource interface](../reference/resources/). - -## Configuring Applications/Components - -For complete information of configuring applications, refer to the [Component Configuration](../reference/components/configuration) reference page. - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. Harper includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as Harper's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the Harper's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, Harper will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/versioned_docs/version-4.6/developers/applications/web-applications.md b/versioned_docs/version-4.6/developers/applications/web-applications.md deleted file mode 100644 index 02fd1893..00000000 --- a/versioned_docs/version-4.6/developers/applications/web-applications.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Web Applications on Harper ---- - -# Web Applications on Harper - -Harper is an efficient, capable, and robust platform for developing web applications, with numerous capabilities designed -specifically for optimized web application delivery. In addition, there are a number of tools and frameworks that can be used -with Harper to create web applications with standard best-practice design and development patterns. Running these frameworks -on Harper can unlock tremendous scalability and performance benefits by leveraging Harper's built-in multi-threading, -caching, and distributed design. - -Harper's unique ability to run JavaScript code directly on the server side, combined with its built-in database for data storage, querying, and caching -allows you to create full-featured web applications with a single platform. This eliminates the overhead of legacy solutions that -require separate application servers, databases, and caching layers, and their requisite communication overhead and latency, while -allowing the full stack to deployed to distributed locations with full local response handling, providing an incredibly low latency web experience. - -## Web Application Frameworks - -With built-in caching mechanisms, and an easy-to-use JavaScript API for interacting with data, creating full-featured applications -using popular frameworks is a simple and straightforward process. - -Get started today with one of our examples: - -- [Next.js](https://github.com/HarperDB/nextjs-example) -- [React SSR](https://github.com/HarperDB/react-ssr-example) -- [Vue SSR](https://github.com/HarperDB/vue-ssr-example) -- [Svelte SSR](https://github.com/HarperDB/svelte-ssr-example) -- [Solid SSR](https://github.com/HarperDB/solid-ssr-example) - -## Cookie Support - -Harper includes support for authenticated sessions using cookies. This allows you to create secure, authenticated web applications -using best-practice security patterns, allowing users to login and maintain a session without any credential storage on the client side -that can be compromised. A login endpoint can be defined by exporting a resource and calling the `login` method on the request object. For example, this could be a login endpoint in your resources.js file: - -```javascript -export class Login extends Resource { - async post(data) { - const { username, password } = data; - await request.login(username, password); - return { message: 'Logged in!' }; - } -} -``` - -This endpoint can be called from the client side using a standard fetch request, a cookie will be returned, and the session will be maintained by Harper. -This allows web applications to directly interact with Harper and database resources, without needing to go through extra layers of authentication handling. - -## Browser Caching Negotiation - -Browsers support caching negotiation with revalidation, which allows requests for locally cached data to be sent to servers with a tag or timestamp. Harper REST functionality can fully interact with these headers, and return `304 Not Modified` response based on prior `Etag` sent in headers. It is highly recommended that you utilize the [REST interface](../rest) for accessing tables, as it facilitates this downstream browser caching. Timestamps are recorded with all records and are then returned [as the `ETag` in the response](../rest#cachingconditional-requests). Utilizing this browser caching can greatly reduce the load on your server and improve the performance of your web application by being able to instantly use locally cached data after revalidation from the server. - -## Built-in Cross-Origin Resource Sharing (CORS) - -Harper includes built-in support for Cross-Origin Resource Sharing (CORS), which allows you to define which domains are allowed to access your Harper instance. This is a critical security feature for web applications, as it prevents unauthorized access to your data from other domains, while allowing cross-domain access from known hosts. You can define the allowed domains in your [Harper configuration file](../../deployments/configuration#http), and Harper will automatically handle the CORS headers for you. - -## More Resources - -Make sure to check out our developer videos too: - -- [Next.js on Harper | Step-by-Step Guide for Next Level Next.js Performance](https://youtu.be/GqLEwteFJYY) -- [Server-side Rendering (SSR) with Multi-Tier Cache Demo](https://youtu.be/L-tnBNhO9Fc) diff --git a/versioned_docs/version-4.6/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.6/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index c4254430..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created Harper will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/analytics.md b/versioned_docs/version-4.6/developers/operations-api/analytics.md deleted file mode 100644 index 470d4066..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/analytics.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: Analytics Operations ---- - -# Analytics Operations - -## get_analytics - -Retrieves analytics data from the server. - -- `operation` _(required)_ - must always be `get_analytics` -- `metric` _(required)_ - any value returned by `list_metrics` -- `start_time` _(optional)_ - Unix timestamp in milliseconds -- `end_time` _(optional)_ - Unix timestamp in milliseconds -- `get_attributes` _(optional)_ - array of attribute names to retrieve -- `conditions` _(optional)_ - array of conditions to filter results (see [search_by_conditions docs](./nosql-operations) for details) - -### Body - -```json -{ - "operation": "get_analytics", - "metric": "resource-usage", - "start_time": 1769198332754, - "end_time": 1769198532754, - "get_attributes": ["id", "metric", "userCPUTime", "systemCPUTime"], - "conditions": [ - { - "attribute": "node", - "operator": "equals", - "value": "node1.example.com" - } - ] -} -``` - -### Response 200 - -```json -[ - { - "id": "12345", - "metric": "resource-usage", - "userCPUTime": 100, - "systemCPUTime": 50 - }, - { - "id": "67890", - "metric": "resource-usage", - "userCPUTime": 150, - "systemCPUTime": 75 - } -] -``` - -## list_metrics - -Returns a list of available metrics that can be queried. - -- `operation` _(required)_ - must always be `list_metrics` -- `metric_types` _(optional)_ - array of metric types to filter results; one or both of `custom` and `builtin`; default is `builtin` - -### Body - -```json -{ - "operation": "list_metrics", - "metric_types": ["custom", "builtin"] -} -``` - -### Response 200 - -```json -["resource-usage", "table-size", "database-size", "main-thread-utilization", "utilization", "storage-volume"] -``` - -## describe_metric - -Provides detailed information about a specific metric, including its structure and available parameters. - -- `operation` _(required)_ - must always be `describe_metric` -- `metric` _(required)_ - name of the metric to describe - -### Body - -```json -{ - "operation": "describe_metric", - "metric": "resource-usage" -} -``` - -### Response 200 - -```json -{ - "attributes": [ - { - "name": "id", - "type": "number" - }, - { - "name": "metric", - "type": "string" - }, - { - "name": "userCPUTime", - "type": "number" - }, - { - "name": "systemCPUTime", - "type": "number" - }, - { - "name": "node", - "type": "string" - } - ] -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.6/developers/operations-api/bulk-operations.md deleted file mode 100644 index b6714552..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,255 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` -- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into Harper - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which Harper is running. For example, the path to a CSV on your computer will produce an error if your Harper instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running Harper - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/certificate-management.md b/versioned_docs/version-4.6/developers/operations-api/certificate-management.md deleted file mode 100644 index f8eea402..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/certificate-management.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Add Certificate - -Adds or updates a certificate in the `hdb_certificate` system table. -If a `private_key` is provided it will **not** be stored in `hdb_certificate`, it will be written to file in `/keys/`. -If a `private_key` is not passed the operation will search for one that matches the certificate. If one is not found an error will be returned. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_certificate` -- `name` _(required)_ - a unique name for the certificate -- `certificate` _(required)_ - a PEM formatted certificate string -- `is_authority` _(required)_ - a boolean indicating if the certificate is a certificate authority -- `hosts` _(optional)_ - an array of hostnames that the certificate is valid for -- `private_key` _(optional)_ - a PEM formatted private key string - -### Body - -```json -{ - "operation": "add_certificate", - "name": "my-cert", - "certificate": "-----BEGIN CERTIFICATE-----ZDFAay... -----END CERTIFICATE-----", - "is_authority": false, - "private_key": "-----BEGIN RSA PRIVATE KEY-----Y4dMpw5f... -----END RSA PRIVATE KEY-----" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added certificate: my-cert" -} -``` - ---- - -## Remove Certificate - -Removes a certificate from the `hdb_certificate` system table and deletes the corresponding private key file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_certificate` -- `name` _(required)_ - the name of the certificate - -### Body - -```json -{ - "operation": "remove_certificate", - "name": "my-cert" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed my-cert" -} -``` - ---- - -## List Certificates - -Lists all certificates in the `hdb_certificate` system table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_certificates` - -### Body - -```json -{ - "operation": "list_certificates" -} -``` - -### Response: 200 - -```json -[ - { - "name": "HarperDB-Certificate-Authority-node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\nTANBgkqhk... S34==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": true, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "serial_number": "5235345", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - }, - { - "name": "node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\ngIEcSR1M... 5bv==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": false, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=node.1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject_alt_name": "IP Address:127.0.0.1, DNS:localhost, IP Address:0:0:0:0:0:0:0:1, DNS:node.1", - "serial_number": "5243646", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - } -] -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/clustering-nats.md b/versioned_docs/version-4.6/developers/operations-api/clustering-nats.md deleted file mode 100644 index fd0b8682..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/clustering-nats.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: Clustering using NATS ---- - -# Clustering using NATS - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional Harper instance with associated subscriptions. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Set Node Replication - -A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_node_replication` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: - - `database` _(optional)_ - the database to replicate from - - `table` _(required)_ - the table to replicate from - - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table -- - -### Body - -```json -{ - "operation": "set_node_replication", - "node_name": "node1", - "subscriptions": [ - { - "table": "dog", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a Harper instance and associated subscriptions from the cluster. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `node_name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Purge Stream - -Will purge messages from a stream - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `purge_stream` -- `database` _(required)_ - the name of the database where the streams table resides -- `table` _(required)_ - the name of the table that belongs to the stream -- `options` _(optional)_ - control how many messages get purged. Options are: - - `keep` - purge will keep this many most recent messages - - `seq` - purge all messages up to, but not including, this sequence - -### Body - -```json -{ - "operation": "purge_stream", - "database": "dev", - "table": "dog", - "options": { - "keep": 100 - } -} -``` - ---- diff --git a/versioned_docs/version-4.6/developers/operations-api/clustering.md b/versioned_docs/version-4.6/developers/operations-api/clustering.md deleted file mode 100644 index 8fc5ae49..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/clustering.md +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -The following operations are available for configuring and managing [Harper replication](../replication/). - -_**If you are using NATS for clustering, please see the**_ [_**NATS Clustering Operations**_](./clustering-nats) _**documentation.**_ - -## Add Node - -Adds a new Harper instance to the cluster. If `subscriptions` are provided, it will also create the replication relationships between the nodes. If they are not provided a fully replicating system will be created. [Learn more about adding nodes here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `hostname` or `url` _(required)_ - one of these fields is required. You must provide either the `hostname` or the `url` of the node you want to add -- `verify_tls` _(optional)_ - a boolean which determines if the TLS certificate should be verified. This will allow the Harper default self-signed certificates to be accepted. Defaults to `true` -- `authorization` _(optional)_ - an object or a string which contains the authorization information for the node being added. If it is an object, it should contain `username` and `password` fields. If it is a string, it should use HTTP `Authorization` style credentials -- `retain_authorization` _(optional)_ - a boolean which determines if the authorization credentials should be retained/stored and used everytime a connection is made to this node. If `true`, the authorization will be stored on the node record. Generally this should not be used, as mTLS/certificate based authorization is much more secure and safe, and avoids the need for storing credentials. Defaults to `false`. -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(optional)_ - The relationship created between nodes. If not provided a fully replicated cluster will be setup. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate - - `table` - the table to replicate - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'server-two' to cluster" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance in the cluster. - -_Operation is restricted to super_user roles only_ - -_Note: will attempt to add the node if it does not exist_ - -- `operation` _(required)_ - must always be `update_node` -- `hostname` _(required)_ - the `hostname` of the remote node you are updating -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'server-two'" -} -``` - ---- - -## Remove Node - -Removes a Harper node from the cluster and stops replication, [Learn more about remove node here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are removing - -### Body - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'server-two' from cluster" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. - -`database_sockets` shows the actual websocket connections that exist between nodes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "type": "cluster-status", - "connections": [ - { - "replicateByDefault": true, - "replicates": true, - "url": "wss://server-2.domain.com:9933", - "name": "server-2.domain.com", - "subscriptions": null, - "database_sockets": [ - { - "database": "data", - "connected": true, - "latency": 0.7, - "thread_id": 1, - "nodes": ["server-2.domain.com"], - "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", - "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", - "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", - "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" - } - ] - } - ], - "node_name": "server-1.domain.com", - "is_enabled": true -} -``` - -There is a separate socket for each database for each node. Each node is represented in the connections array, and each database connection to that node is represented in the `database_sockets` array. Additional timing statistics include: - -- `lastCommitConfirmed`: When a commit is sent out, it should receive a confirmation from the remote server; this is the last receipt of confirmation of an outgoing commit. -- `lastReceivedRemoteTime`: This is the timestamp of the transaction that was last received. The timestamp is from when the original transaction occurred. -- `lastReceivedLocalTime`: This is local time when the last transaction was received. If there is a different between this and `lastReceivedRemoteTime`, it means there is a delay from the original transaction to \* receiving it and so it is probably catching-up/behind. -- `sendingMessage`: The timestamp of transaction is actively being sent. This won't exist if the replicator is waiting for the next transaction to send. - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object following the `add_node` schema. - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password2" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "my-table", - "subscribe": true, - "publish": false - } - ] - }, - { - "hostname": "server-three", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password3" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Cluster Set Routes - -Adds a route/routes to the `replication.routes` configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `routes` _(required)_ - the routes field is an array that specifies the routes for clustering. Each element in the array can be either a string or an object with `hostname` and `port` properties. - -### Body - -```json -{ - "operation": "cluster_set_routes", - "routes": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets the replication routes from the Harper config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -[ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } -] -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from the Harper config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/components.md b/versioned_docs/version-4.6/developers/operations-api/components.md deleted file mode 100644 index 95c800b1..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/components.md +++ /dev/null @@ -1,546 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a predefined template. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -If deploying with the `payload` option, Harper will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory. - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have an installed SSH keys on the server: - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string -- `restart` _(optional)_ - must be either a boolean or the string `rolling`. If set to `rolling`, a rolling restart will be triggered after the component is deployed, meaning that each node in the cluster will be sequentially restarted (waiting for the last restart to start the next). If set to `true`, the restart will not be rolling, all nodes will be restarted in parallel. If `replicated` is `true`, the restart operations will be replicated across the cluster. -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. This can be used to install dependencies with pnpm or yarn, for example, like: `"install_command": "npm install -g pnpm && pnpm install"` - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template", - "replicated": true -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete -- `replicated` _(optional)_ - if true, Harper will replicate the component deletion to all nodes in the cluster. Must be a boolean. -- `restart` _(optional)_ - if true, Harper will restart after dropping the component. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` -- `replicated` _(optional)_ - if true, Harper will replicate the component update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` - ---- - -## Add SSH Key - -Adds an SSH key for deploying components from private repositories. This will also create an ssh config file that will be used when deploying the components. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_ssh_key` -- `name` _(required)_ - the name of the key -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` -- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. - _Operation is restricted to super_user roles only_ - -### Body - -```json -{ - "operation": "add_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nfake\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Added ssh key: harperdb-private-component" -} -``` - -### Generated Config and Deploy Component "package" string examples - -``` -#harperdb-private-component -Host harperdb-private-component.github.com - HostName github.com - User git - IdentityFile /hdbroot/ssh/harperdb-private-component.key - IdentitiesOnly yes -``` - -``` -"package": "git+ssh://git@:.git#semver:v1.2.3" - -"package": "git+ssh://git@harperdb-private-component.github.com:HarperDB/harperdb-private-component.git#semver:v1.2.3" -``` - -Note that `deploy_component` with a package uses `npm install` so the url must be a valid npm format url. The above is an example of a url using a tag in the repo to install. - ---- - -## Update SSH Key - -Updates the private key contents of an existing SSH key. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_ssh_key` -- `name` _(required)_ - the name of the key to be updated -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `replicated` _(optional)_ - if true, Harper will replicate the key update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "update_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nNEWFAKE\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Updated ssh key: harperdb-private-component" -} -``` - -## Delete SSH Key - -Deletes a SSH key. This will also remove it from the generated SSH config. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_ssh_key` -- `name` _(required)_ - the name of the key to be deleted -- `replicated` _(optional)_ - if true, Harper will replicate the key deletion to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "name": "harperdb-private-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Deleted ssh key: harperdb-private-component" -} -``` - ---- - -## List SSH Keys - -List off the names of added SSH keys - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_ssh_keys` - -### Body - -```json -{ - "operation": "list_ssh_keys" -} -``` - -### Response: 200 - -```json -[ - { - "name": "harperdb-private-component" - } -] -``` - -_Note: Additional SSH keys would appear as more objects in this array_ - ---- - -## Set SSH Known Hosts - -Sets the SSH known_hosts file. This will overwrite the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_ssh_known_hosts` -- `known_hosts` _(required)_ - The contents to set the known_hosts to. Line breaks must be delimite d with -- `replicated` _(optional)_ - if true, Harper will replicate the known hosts to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_ssh_known_hosts", - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Known hosts successfully set" -} -``` - -## Get SSH Known Hosts - -Gets the contents of the known_hosts file - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_ssh_known_hosts` - -### Body - -```json -{ - "operation": "get_ssh_known_hosts" -} -``` - -### Response: 200 - -```json -{ - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - ---- - -## Install Node Modules - -:::warning Deprecated -This operation is deprecated, as it is handled automatically by [deploy_component](#deploy-component) and [restart](./system-operations#restart). -::: - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/configuration.md b/versioned_docs/version-4.6/developers/operations-api/configuration.md deleted file mode 100644 index 9872da4f..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/configuration.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -## Set Configuration - -Modifies the Harper configuration file parameters. Must follow with a [restart](./system-operations#restart) or [restart_service](./system-operations#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the Harper configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "replication": { - "hostname": "node1", - "databases": "*", - "routes": null, - "url": "wss://127.0.0.1:9925" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/custom-functions.md b/versioned_docs/version-4.6/developers/operations-api/custom-functions.md deleted file mode 100644 index 2c469bf4..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,281 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -:::warning Deprecated -These operations are deprecated. -::: - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDStudio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. Harper Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.6/developers/operations-api/databases-and-tables.md deleted file mode 100644 index 7c17fb4d..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts about 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** - -_Note: Harper will automatically create new attributes on insert and update if they do not already exist within the database._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with Harper not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (Harper actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup Harper databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/index.md b/versioned_docs/version-4.6/developers/operations-api/index.md deleted file mode 100644 index ad44d9de..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/index.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling Harper. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../deployments/configuration#operationsapi), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security#basic-auth) or [JWT authentication](./security#jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Clustering with NATS](operations-api/clustering-nats) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [System Operations](operations-api/system-operations) -- [Configuration](operations-api/configuration) -- [Certificate Management](operations-api/certificate-management) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) -- [Analytics](operations-api/analytics) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/jobs.md b/versioned_docs/version-4.6/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/logs.md b/versioned_docs/version-4.6/developers/operations-api/logs.md deleted file mode 100644 index 52e52740..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/logs.md +++ /dev/null @@ -1,732 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read Harper Log - -Returns log outputs from the primary Harper log based on the provided search criteria. [Read more about Harper logging here](../../administration/logging/standard-logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` -- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. [Read more about Harper transaction logs here](./logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the Harper configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. [Read more about Harper transaction logs here.](../../administration/logging/transaction-logging#read_transaction_log) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. [Read more about Harper transaction logs here](./logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values \[`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the Harper user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the Harper configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.6/developers/operations-api/nosql-operations.md deleted file mode 100644 index 9db7585a..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,389 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID or incremented number (depending on type) will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `upsert` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_attribute` - deprecated in favor of `attribute` -- `value` _(required)_ - value you wish to search - wild cards are allowed -- `search_value` - deprecated in favor of `value` -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "attribute": "owner_name", - "value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: - - `attribute` _(required)_ - The attribute to sort by - - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) - - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: - - `attribute` _(required)_ - the attribute you wish to search, can be any attribute. - - `search_attribute` - deprecated in favor of `attribute` - - `comparator` _(required)_ - the type of search to perform - `equals`, `not_equal`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_type` - deprecated in favor of `comparator` - - `value` _(required)_ - case-sensitive value you wish to search. If the `comparator` is `between` then use an array of two values to search between (both inclusive) - - `search_value` - deprecated in favor of `value` - Or a set of grouped conditions has the following properties: - - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` - - `conditions` _(required)_ - the array of conditions objects as described above. - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "sort": { - "attribute": "id", - "next": { - "attribute": "age", - "descending": true - } - }, - "get_attributes": ["*"], - "conditions": [ - { - "attribute": "age", - "comparator": "between", - "value": [5, 8] - }, - { - "attribute": "weight_lbs", - "comparator": "greater_than", - "value": 40 - }, - { - "operator": "or", - "conditions": [ - { - "attribute": "adorable", - "comparator": "equals", - "value": true - }, - { - "attribute": "lovable", - "comparator": "equals", - "value": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.6/developers/operations-api/quickstart-examples.md deleted file mode 100644 index a6c8f637..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -Harper recommends utilizing [Harper Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the Harper Operations API. - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in Harper are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. - -Harper does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and Harper will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -Harper supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/registration.md b/versioned_docs/version-4.6/developers/operations-api/registration.md deleted file mode 100644 index 28c6a0e9..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/registration.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the Harper instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Get Fingerprint - -Returns the Harper fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -Sets the Harper license as generated by Harper License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/sql-operations.md b/versioned_docs/version-4.6/developers/operations-api/sql-operations.md deleted file mode 100644 index 4b7076bb..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: SQL Operations ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/system-operations.md b/versioned_docs/version-4.6/developers/operations-api/system-operations.md deleted file mode 100644 index d39e93cb..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/system-operations.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: System Operations ---- - -# System Operations - -## Restart - -Restarts the Harper instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified Harper service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` -- `replicated` _(optional)_ - must be a boolean. If set to `true`, Harper will replicate the restart service operation across all nodes in the cluster. The restart will occur as a rolling restart, ensuring that each node is fully restarted before the next node begins restarting. - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'metrics', 'threads', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Set Status - -Sets a status value that can be used for application-specific status tracking. Status values are stored in memory and are not persisted across restarts. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_status` -- `id` _(required)_ - the key identifier for the status -- `status` _(required)_ - the status value to set (string between 1-512 characters) - -### Body - -```json -{ - "operation": "set_status", - "id": "primary", - "status": "active" -} -``` - -### Response: 200 - -```json -{ - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 -} -``` - -### Notes - -- The `id` parameter must be one of the allowed status types: 'primary', 'maintenance', or 'availability' -- If no `id` is specified, it defaults to 'primary' -- For 'availability' status, only 'Available' or 'Unavailable' values are accepted -- For other status types, any string value is accepted - ---- - -## Get Status - -Retrieves a status value previously set with the set_status operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_status` -- `id` _(optional)_ - the key identifier for the status to retrieve (defaults to all statuses if not provided) - -### Body - -```json -{ - "operation": "get_status", - "id": "primary" -} -``` - -### Response: 200 - -```json -{ - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 -} -``` - -If no id parameter is provided, all status values will be returned: - -```json -[ - { - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 - }, - { - "id": "maintenance", - "status": "scheduled", - "__createdtime__": 1621364600123, - "__updatedtime__": 1621364600123 - } -] -``` - ---- - -## Clear Status - -Removes a status entry by its ID. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `clear_status` -- `id` _(required)_ - the key identifier for the status to remove - -### Body - -```json -{ - "operation": "clear_status", - "id": "primary" -} -``` - -### Response: 200 - -```json -{ - "message": "Status successfully cleared" -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/token-authentication.md b/versioned_docs/version-4.6/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/versioned_docs/version-4.6/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.6/developers/operations-api/users-and-roles.md deleted file mode 100644 index 91f222b9..00000000 --- a/versioned_docs/version-4.6/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "developer" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. Harper will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your Harper instance. If set to false, user will not be able to access your instance of Harper. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. Harper will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your Harper instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/versioned_docs/version-4.6/developers/real-time.md b/versioned_docs/version-4.6/developers/real-time.md deleted file mode 100644 index 9c5c79e4..00000000 --- a/versioned_docs/version-4.6/developers/real-time.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -Harper provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. Harper supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -Harper real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a Harper application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -Harper is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -Harper supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in Harper is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -Harper supports MQTT with its `mqtt` server module and Harper supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - mTLS: false - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). - -#### Capabilities - -Harper's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In Harper topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -Harper supports QoS 0 and 1 for publishing and subscribing. - -Harper supports multi-level topics, both for subscribing and publishing. Harper also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. Harper currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -#### Events - -JavaScript components can also listen for MQTT events. This is available on the server.mqtt.events object. For example, to set up a listener/callback for when MQTT clients connect and authorize, we can do: - -```javascript -server.mqtt.events.on('connected', (session, socket) => { - console.log('client connected with id', session.clientId); -}); -``` - -The following MQTT events are available: - -- `connection` - When a client initially establishes a TCP or WS connection to the server -- `connected` - When a client establishes an authorized MQTT connection -- `auth-failed` - When a client fails to authenticate -- `disconnected` - When a client disconnects from the server - -### Ordering - -Harper is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. Harper is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, Harper does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because Harper prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and Harper will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Keep-Alive monitoring | :heavy_check_mark: | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties retain handling | :heavy_check_mark: | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.6/developers/replication/index.md b/versioned_docs/version-4.6/developers/replication/index.md deleted file mode 100644 index 469ed52e..00000000 --- a/versioned_docs/version-4.6/developers/replication/index.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -title: Replication/Clustering ---- - -# Replication/Clustering - -Harper’s replication system is designed to make distributed data replication fast and reliable across multiple nodes. This means you can easily build a distributed database that ensures high availability, disaster recovery, and data localization. The best part? It’s simple to set up, configure, and manage. You can easily add or remove nodes, choose which data to replicate, and monitor the system’s health without jumping through hoops. - -### Replication Overview - -Harper replication uses a peer-to-peer model where every node in your cluster can send and subscribe to data. Each node connects through WebSockets, allowing data to flow seamlessly in both directions. By default, Harper takes care of managing these connections and subscriptions, so you don’t have to worry about data consistency. The system is designed to maintain secure, reliable connections between nodes, ensuring that your data is always safe. - -### Replication Configuration - -To connect your nodes, you need to provide hostnames or URLs for the nodes to connect to each other. This can be done via configuration or through operations. To configure replication, you can specify connection information the `replication` section of the [harperdb-config.yaml](../deployments/configuration). Here, you can specify the host name of the current node, and routes to connect to other nodes, for example: - -```yaml -replication: - hostname: server-one - routes: - - server-two - - server-three -``` - -In this example, the current node is `server-one`, and it will connect to `server-two` and `server-three`. Routes to other nodes can also be configured with URLs or ports: - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9933 # URL based route - - hostname: server-three # define a hostname and port - port: 9933 -``` - -You can also use the [operations API](./operations-api/clustering) to dynamically add and remove nodes from the cluster. This is useful for adding new nodes to a running cluster or removing nodes that are no longer needed. For example (note this is the basic form, you would also need to provide the necessary credentials for the operation, see the section on securing connections for more details): - -```json -{ - "operation": "add_node", - "hostname": "server-two" -} -``` - -These operations will also dynamically generating certificates as needed, if there are no existing signed certificates, or if the existing certificates are not valid for the new node. - -Harper will also automatically replicate node information to other nodes in a cluster ([gossip-style discovery](https://highscalability.com/gossip-protocol-explained/)). This means that you only need to connect to one node in an existing cluster, and Harper will automatically detect and connect to other nodes in the cluster (bidirectionally). - -By default, Harper will replicate all the data in all the databases. You can configure which databases are replicated, and then override this behavior on a per-table basis. For example, you can indicate which databases should be replicated by default, here indicating you want to replicate the `data` and `system` databases: - -```yaml -replication: - databases: - - data - - system -``` - -By default, all tables within a replicated database will be replicated. Transactions are replicated atomically, which may involve data across multiple tables. However, you can also configure replication for individual tables, and disable and exclude replication for specific tables in a database by setting `replicate` to `false` in the table definition: - -```graphql -type LocalTableForNode @table(replicate: false) { - id: ID! - name: String! -} -``` - -You can also control which nodes data is replicated to, and how many nodes data is replicated to. By default, Harper will replicate data to all nodes in the cluster, but you can control where data is replicated to with the [sharding configuration and APIs](replication/sharding). - -By default, replication connects to the secure port 9933. You can configure the replication port in the `replication` section. - -```yaml -replication: - securePort: 9933 -``` - -### Securing Connections - -Harper supports the highest levels of security through public key infrastructure based security and authorization. Depending on your security configuration, you can configure Harper in several different ways to build a connected cluster. - -#### Provide your own certificates - -If you want to secure your Harper connections with your own signed certificates, you can easily do so. Whether you have certificates from a public authority (like Let's Encrypt or Digicert) or a corporate certificate authority, you can use them to authenticate nodes securely. You can then allow nodes to authorize each other by checking the certificate against the standard list of root certificate authorities by enabling the `enableRootCAs` option in the config: - -``` -replication - enableRootCAs: true -``` - -And then just make sure the certificate’s common name (CN) matches the node's hostname. - -#### Setting Up Custom Certificates - -There are two ways to configure Harper with your own certificates: - -1. Use the `add_certificate` operation to upload them. -1. Or, specify the certificate paths directly in the `replication` section of the `harperdb-config.yaml` file. - -If your certificate is signed by a trusted public authority, just provide the path to the certificate and private key. If you're using self-signed certificates or a private certificate authority, you’ll also need to provide the certificate authority (CA) details to complete the setup.\ -\ -Example configuration: - -```yaml -tls: - certificate: /path/to/certificate.pem - certificateAuthority: /path/to/ca.pem - privateKey: /path/to/privateKey.pem -``` - -With this in place, Harper will load the provided certificates into the certificate table and use these to secure and authenticate connections between nodes. - -You have the option to skip providing a specific certificate authority (CA) and instead verify your certificate against the root certificates included in the bundled Mozilla CA store. This bundled CA store, provided by Node.js, is a snapshot of Mozilla's CA certificates that is fixed at the time of each Node.js release. - -To enable the root certificates set `replication.enableRootCAs` to `true` in the `harperdb-config.yaml` file: - -```yaml -replication: - enableRootCAs: true -``` - -#### Cross-generated certificates - -Harper can also generate its own certificates for secure connections. This is useful for setting up secure connections between nodes when no existing certificates are available, and can be used in development, testing, or production environments. Certificates will be automatically requested and signed between nodes to support a form of distributed certificate generation and signing. To establish secure connections between nodes using cross-generated certificates, you simply use the [`add_node` operation](./operations-api/clustering) over SSL, and specify the temporary authentication credentials to use for connecting and authorizing the certificate generation and signing. \ -\ -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -When you connect to another node (e.g., `server-two`), Harper uses secure WebSockets and the provided credentials to establish the connection. - -If you’re working with a fresh install, you’ll need to set `verify_tls` to `false` temporarily, so the self-signed certificate is accepted. Once the connection is made, Harper will automatically handle the certificate signing process: - -- It creates a certificate signing request (CSR), sends it to `server-two`, which then signs it and returns the signed certificate along with the certificate authority (CA). -- The signed certificate is stored for future connections between the nodes, ensuring secure communication. - -**Important:** Your credentials are not stored—they are discarded immediately after use. - -You can also provide credentials in HTTP Authorization format (Basic auth, Token auth, or JWT). This is helpful for handling authentication with the required permissions to generate and sign certificates. - -Additionally, you can use `set_node` as an alias for the `add_node` operation if you prefer. - -#### Revoking Certificates - -Certificates used in replication can be revoked by using the certificate serial number and either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config in `harperdb-config.yaml`. - -To utilize the `revoked_certificates` attribute in the `hdb_nodes` table, you can use the `add_node` or `update_node` operation to add the certificate serial number to the `revoked_certificates` array. For example: - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "revoked_certificates": ["1769F7D6A"] -} -``` - -To utilize the replication route config in `harperdb-config.yaml`, you can add the certificate serial number to the `revokedCertificates` array. For example: - -```yaml -replication: - routes: - - hostname: server-three - port: 9930 - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -#### Removing Nodes - -Nodes can be removed from the cluster using the [`remove_node` operation](./operations-api/clustering). This will remove the node from the cluster, and stop replication to and from the node. For example: - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -#### Insecure Connection IP-based Authentication - -You can completely disable secure connections and use IP addresses to authenticate nodes with each other. This can be useful for development and testing, or within a secure private network, but should never be used for production with publicly accessible servers. To disable secure connections, simply configure replication within an insecure port, either by [configuring the operations API](../deployments/configuration) to run on an insecure port or replication to run on an insecure port. And then set up IP-based routes to connect to other nodes: - -```yaml -replication: - port: 9933 - routes: - - 127.0.0.2 - - 127.0.0.3 -``` - -Note that in this example, we are using loop back addresses, which can be a convenient way to run multiple nodes on a single machine for testing and development. - -#### Explicit Subscriptions - -#### Managing Node Connections and Subscriptions in Harper - -By default, Harper automatically handles connections and subscriptions between nodes, ensuring data consistency across your cluster. It even uses data routing to manage node failures. But if you want more control, you can manage these connections manually by explicitly subscribing to nodes. This is useful for advanced configurations, testing, or debugging. - -#### Important Notes on Explicit Subscriptions - -If you choose to manage subscriptions manually, Harper will no longer handle data consistency for you. This means there’s no guarantee that all nodes will have consistent data if subscriptions don’t fully replicate in all directions. If a node goes down, it’s possible that some data wasn’t replicated before the failure. - -#### How to Subscribe to Nodes - -To explicitly subscribe to a node, you can use operations like `add_node` and define the subscriptions. For example, you can configure a node (e.g., `server-two`) to publish transactions on a specific table (e.g., `dev.my-table`) without receiving data from that node. - -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": false - } - ] -} -``` - -To update an explicit subscription you can use the [`update_node` operation](./operations-api/clustering). - -Here we are updating the subscription to receive transactions on the `dev.my-table` table from the `server-two` node. - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": true - } - ] -} -``` - -#### Monitoring Replication - -You can monitor the status of replication through the operations API. You can use the [`cluster_status` operation](./operations-api/clustering) to get the status of replication. For example: - -```json -{ - "operation": "cluster_status" -} -``` - -#### Database Initial Synchronization and Resynchronization - -When a new node is added to the cluster, if its database has not previously been synced, it will initially download the database from the first node it connects to. This will copy every record from the source database to the new node. Once the initial synchronization is complete, the new node will enter replication mode and receive records from each node as they are created, updated, or deleted. If a node goes down and comes back up, it will also resynchronize with the other nodes in the cluster, to ensure that it has the most up-to-date data. - -You may also specify a `start_time` in the `add_node` to specify that when a database connects, that it should not download the entire database, but only data since a given starting time. - -**Advanced Configuration** - -You can also check the configuration of the replication system, including the current known nodes and certificates, by querying the hdb_nodes and hdb_certificate table: - -```json -{ - "operation": "search_by_value", - "database": "system", - "table": "hdb_nodes", - "attribute": "name", - "value": "*" -} -``` diff --git a/versioned_docs/version-4.6/developers/replication/sharding.md b/versioned_docs/version-4.6/developers/replication/sharding.md deleted file mode 100644 index f22237b8..00000000 --- a/versioned_docs/version-4.6/developers/replication/sharding.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: Sharding ---- - -Harper's replication system supports various levels of replication or sharding. Harper can be configured or set up to replicate to different data to different subsets of nodes. This can be used facilitate horizontally scalability of storage and write performance, while maintaining optimal strategies of data locality and data consistency. When sharding is configured, Harper will replicate data to only a subset of nodes, based on the sharding configuration, and can then retrieve data from the appropriate nodes as needed to fulfill requests for data. - -There are two main ways to setup sharding in Harper. The approach is to use dynamic sharding, where the location or residency of records is determined dynamically based on where the record was written and record data, and records can be dynamically relocated based on where they are accessed. This residency information can be specific to each record, and can vary based on the computed residency and where the data is written and accessed. - -The second approach is define specific shards, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed, or content. This approach is more static, but can be more efficient for certain use cases, and means that the location of data can always be predictably determined based on the primary key. - -## Configuration For Dynamic Sharding - -By default, Harper will replicate all data to all nodes. However, replication can easily be configured for "sharding", or storing different data in different locations or nodes. The simplest way to configure sharding and limit replication to improve performance and efficiency is to configure a replication-to count. This will limit the number of nodes that data is replicated to. For example, to specify that writes should replicate to 2 other nodes besides the node that first stored the data, you can set the `replicateTo` to 2 in the `replication` section of the `harperdb-config.yaml` file: - -```yaml -replication: - replicateTo: 2 -``` - -This will ensure that data is replicated to two other nodes, so that each record will be stored on three nodes in total. - -With a sharding configuration (or customization below) in place, requests will for records that don't reside on the server handling requests will automatically be forwarded to the appropriate node. This will be done transparently, so that the client will not need to know where the data is stored. - -## Replication Control with Headers - -With the REST interface, replication levels and destinations can also specified with the `X-Replicate-To` header. This can be used to indicate the number of additional nodes that data should be replicated to, or to specify the nodes that data should be replicated to. The `X-Replicate-To` header can be used with the `POST` and `PUT` methods. This header can also specify if the response should wait for confirmation from other nodes, and how many, with the `confirm` parameter. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: 2;confirm=1 - -... -``` - -You can also explicitly specify destination nodes by providing a comma-separated list of node hostnames. For example, to specify that data should be replicated to nodes `node1` and `node2`, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: node1,node2 -``` - -(This can also be used with the `confirm` parameter.) - -## Replication Control with Operations - -Likewise, you can specify replicateTo and confirm parameters in the operation object when using the Harper API. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following operation object: - -```jsonc -{ - "operation": "update", - "schema": "dev", - "table": "MyTable", - "hashValues": [3], - "record": { - "name": "John Doe", - }, - "replicateTo": 2, - "replicatedConfirmation": 1, -} -``` - -or you can specify nodes: - -```jsonc -{ - // ... - "replicateTo": ["node-1", "node-2"], - // ... -} -``` - -## Programmatic Replication Control - -Additionally, you can specify `replicateTo` and `replicatedConfirmation` parameters programmatically in the context of a resource. For example, you can define a put method: - -```javascript -class MyTable extends tables.MyTable { - put(record) { - const context = this.getContext(); - context.replicateTo = 2; // or an array of node names - context.replicatedConfirmation = 1; - return super.put(record); - } -} -``` - -## Configuration for Static Sharding - -Alternatively, you can configure static sharding, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key. The `shard` is identified by a number. To configure the shard for each node, you can specify the shard number in the `replication`'s `shard` in the configuration: - -```yaml -replication: - shard: 1 -``` - -Alternatively, you can configure the `shard` under the `replication` `routes`. This allows you to assign a specific shard id based on the routing configuration. - -```yaml -replication: - routes: - - hostname: node1 - shard: 1 - - hostname: node2 - shard: 2 -``` - -Or you can specify a `shard` number by including that property in an `add_node` operation or `set_node` operation, to dynamically assign a node to a shard. - -You can then specify shard number in the `setResidency` or `setResidencyById` functions below. - -## Custom Sharding - -You can also define a custom sharding strategy by specifying a function to compute the "residency" or location of where records should be stored and reside. To do this we use the `setResidency` method, providing a function that will determine the residency of each record. The function you provide will be called with the record entry, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the `id` field, you can use the following code: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? ['node1'] : ['node2']; -}); -``` - -With this approach, the record metadata, which includes the residency information, and any indexed properties, will be replicated to all nodes, but the full record will only be replicated to the nodes specified by the residency function. - -The `setResidency` function can alternately return a shard number, which will replicate the data to all the nodes in that shard: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? 1 : 2; -}); -``` - -### Custom Sharding By Primary Key - -Alternately you can define a custom sharding strategy based on the primary key alone. This allows records to be retrieved without needing access to the record data or metadata. With this approach, data will only be replicated to the nodes specified by the residency function (the record metadata doesn't need to replicated to all nodes). To do this, you can use the `setResidencyById` method, providing a function that will determine the residency or shard of each record based on the primary key. The function you provide will be called with the primary key, and should return a `shard` number or an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the primary key, you can use the following code: - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? 1 : 2; // return shard number -}); -``` - -or - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? ['node1'] : ['node2']; // return array of node hostnames -}); -``` - -### Disabling Cross-Node Access - -Normally sharding allows data to be stored in specific nodes, but still allows access to the data from any node. However, you can also disable cross-node access so that data is only returned if is stored on the node where it is accessed. To do this, you can set the `replicateFrom` property on the context of operation to `false`: - -```jsonc -{ - "operation": "search_by_id", - "table": "MyTable", - "ids": [3], - "replicateFrom": false, -} -``` - -Or use a header with the REST API: - -```http -GET /MyTable/3 -X-Replicate-From: none -``` diff --git a/versioned_docs/version-4.6/developers/rest.md b/versioned_docs/version-4.6/developers/rest.md deleted file mode 100644 index ee44325b..00000000 --- a/versioned_docs/version-4.6/developers/rest.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: REST ---- - -# REST - -## REST - -Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](./applications/) and [defining schemas](./applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](./applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -### GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -#### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -##### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -#### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -#### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. Note that this will only work for properties that are declared in the schema. - -### PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -#### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -### DELETE - -This can be used to delete a record or records. - -### `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -### `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -### POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -#### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -### Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in Harper. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the attributes needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /Product/?category=software&price=gt=100&price=lt=200 -``` - -Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: - -``` -GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z -``` - -You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: - -```http -GET /Product/?name==Keyboard* -``` - -**Chained Conditions** - -You can also specify that a range condition must be met for a single attribute value by chaining conditions. This is done by omitting the name in the name-value pair. For example, to find products with a price between 100 and 200, you could write: - -```http -GET /Product/?price=gt=100<=200 -``` - -Chaining can be used to combined `gt` or `ge` with `lt` or `le` to specify a range of values. Currently, no other types of chaining are supported. - -Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. - -Here is a full list of the supported FIQL-style operators/comparators: - -- `==`: equal -- `=lt=`: less than -- `=le=`: less than or equal -- `=gt=`: greater than -- `=ge=`: greater than or equal -- `=ne=`, !=: not equal -- `=ct=`: contains the value (for strings) -- `=sw=`, `==*`: starts with the value (for strings) -- `=ew=`: ends with the value (for strings) -- `=`, `===`: strict equality (no type conversion) -- `!==`: strict inequality (no type conversion) - -#### Unions - -Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: - -```http -GET /Product/?rating=5|featured=true -``` - -#### Grouping of Operators - -Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: - -```http -GET /Product/?rating=5|(price=gt=100&price=lt=200) -``` - -Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: - -```http -GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] -``` - -And the tags could be safely generated from user inputs in a tag array like: - -```javascript -let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; -``` - -More complex queries can be created by further nesting groups: - -```http -GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] -``` - -### Query Calls - -Harper has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -#### `select(properties)` - -This function allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. -- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. - -To get a list of product names with a category of software: - -```http -GET /Product/?category=software&select(name) -``` - -#### `limit(start,end)` or `limit(end)` - -This function specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -#### `sort(property)`, `sort(+property,-property,...)` - -This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. - -For example, to sort by product name (in ascending order): - -```http -GET /Product?rating=gt=3&sort(+name) -``` - -To sort by rating in ascending order, then by price in descending order for products with the same rating: - -```http -GET /Product?sort(+rating,-price) -``` - -## Relationships - -Harper supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. - -### Chained Attributes and Joins - -To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides Harper's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - brandId: ID @indexed - brand: Brand @relationship(from: "brandId") -} -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: "brandId") -} -``` - -And then you could query a product by brand name: - -```http -GET /Product/?brand.name=Microsoft -``` - -This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. - -The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: - -```http -GET /Brand/?products.name=Keyboard -``` - -This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". - -#### Chained/Nested Select - -Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand) -``` - -We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand{name}) -``` - -Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: - -```http -GET /Product/?name=Keyboard&select(name,brand{name,id}) -``` - -When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. - -#### Many-to-many Relationships (Array of Foreign Keys) - -Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - resellerIds: [ID] @indexed - resellers: [Reseller] @relationship(from: "resellerId") -} -type Reseller @table { - id: ID @primaryKey - name: String - ... -} -``` - -The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": - -```http -GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) -``` - -One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): - -```http -PUT /Product/123 -Content-Type: application/json - -{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], -...} -``` - -#### Type Conversion - -Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: - -- `name==null`: Will convert the value to `null` for searching. -- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==number:123`: Will explicitly convert the value after "number:" to a number. -- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. -- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) -- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. - -If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. - -For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. - -#### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, Harper supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -#### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/versioned_docs/version-4.6/developers/security/basic-auth.md b/versioned_docs/version-4.6/developers/security/basic-auth.md deleted file mode 100644 index 43448caf..00000000 --- a/versioned_docs/version-4.6/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -Harper uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.** - -A header is added to each HTTP request. The header key is **"Authorization"** the header value is **"Basic <<your username and password buffer token>>"** - -## Authentication in Harper Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for Harper. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.6/developers/security/certificate-management.md b/versioned_docs/version-4.6/developers/security/certificate-management.md deleted file mode 100644 index 43209e05..00000000 --- a/versioned_docs/version-4.6/developers/security/certificate-management.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for Harper external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of Harper does not have HTTPS enabled (see [configuration](../../deployments/configuration#http) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -By default Harper will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your Harper node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable Harper HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the Harper configuration with the path of your new certificate files, and then restart Harper. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set Harper will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### mTLS - -Mutual TLS (mTLS) is a security protocol that requires both the client and the server to present certificates to each other. Requiring a client certificate can be useful for authenticating clients and ensuring that only authorized clients can access your Harper instance. This can be enabled by setting the `http.mtls` configuration in `harperdb-config.yaml` to `true` and providing a certificate authority in the TLS section: - -```yaml - -http: - mtls: true - ... -tls: - certificateAuthority: ~/hdb/keys/ca.pem - ... -``` - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for Harper, Nginx can be used as a reverse proxy for Harper. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to Harper as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for Harper, a number of different external services can be used as a reverse proxy for Harper. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to Harper as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for Harper administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.6/developers/security/configuration.md b/versioned_docs/version-4.6/developers/security/configuration.md deleted file mode 100644 index 2dee9d86..00000000 --- a/versioned_docs/version-4.6/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -Harper was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with Harper. - -## CORS - -Harper allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, Harper enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harpersystems.dev,https://products.harpersystems.dev` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -Harper provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -Harper automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from Harper Operations API.** diff --git a/versioned_docs/version-4.6/developers/security/index.md b/versioned_docs/version-4.6/developers/security/index.md deleted file mode 100644 index 723db452..00000000 --- a/versioned_docs/version-4.6/developers/security/index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Security ---- - -# Security - -Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they’re supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -- [JWT Authentication](security/jwt-auth) -- [Basic Authentication](security/basic-auth) -- [mTLS Authentication](security/mtls-auth) -- [Configuration](security/configuration) -- [Users and Roles](security/users-and-roles) diff --git a/versioned_docs/version-4.6/developers/security/jwt-auth.md b/versioned_docs/version-4.6/developers/security/jwt-auth.md deleted file mode 100644 index 832373e4..00000000 --- a/versioned_docs/version-4.6/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -Harper uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all Harper operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their Harper credentials. The following POST body is sent to Harper. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by Harper. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by Harper. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.6/developers/security/mtls-auth.md b/versioned_docs/version-4.6/developers/security/mtls-auth.md deleted file mode 100644 index 375ec927..00000000 --- a/versioned_docs/version-4.6/developers/security/mtls-auth.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: mTLS Authentication ---- - -# mTLS Authentication - -Harper supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. diff --git a/versioned_docs/version-4.6/developers/security/users-and-roles.md b/versioned_docs/version-4.6/developers/security/users-and-roles.md deleted file mode 100644 index 19f829b2..00000000 --- a/versioned_docs/version-4.6/developers/security/users-and-roles.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -Harper utilizes a Role-Based Access Control (RBAC) framework to manage access to Harper instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in Harper - -Role permissions in Harper are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a Harper instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. Harper will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within Harper. See full breakdown of operations restricted to only super_user roles [here](#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a Harper instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a Harper instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```json -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "database_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true - } - ] - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [] - } - } - } - } -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within Harper, but ignored, as super_users have full access to the database._ - -- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] -} -``` - -**Important Notes About Table Permissions** - -1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that Harper manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in Harper and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ - -| Databases and Tables | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| describe_all | | -| describe_database | | -| describe_table | | -| create_database | X | -| drop_database | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed Harper as `<>`. Because Harper stores files natively on the operating system, we only allow the Harper executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files Harper needs. This also keeps Harper more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.6/index.mdx b/versioned_docs/version-4.6/index.mdx deleted file mode 100644 index a46de397..00000000 --- a/versioned_docs/version-4.6/index.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: Harper Docs ---- - -import CustomDocCardList from '@site/src/components/CustomDocCardList'; - -# Harper Docs - -:::info - -### Get the Most Out of Harper - -Join our Discord to access expert support, collaborate with Harper’s core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) -::: - -Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. - -Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. - -## Getting Started - -The best way to get started using Harper is to head over to the [Learn](/learn/) section and work through the Getting Started and Developer guides. - -## Building with Harper - - diff --git a/versioned_docs/version-4.6/reference/_category_.json b/versioned_docs/version-4.6/reference/_category_.json deleted file mode 100644 index 1a36ae90..00000000 --- a/versioned_docs/version-4.6/reference/_category_.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "link": { - "type": "doc", - "id": "reference/index" - } -} diff --git a/versioned_docs/version-4.6/reference/analytics.md b/versioned_docs/version-4.6/reference/analytics.md deleted file mode 100644 index 61616931..00000000 --- a/versioned_docs/version-4.6/reference/analytics.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -Harper provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -Harper collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the Harper analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -# Standard Analytics Metrics - -While applications can define their own metrics, Harper provides a set of standard metrics that are tracked for all services: - -## HTTP - -The following metrics are tracked for all HTTP requests: - -| `metric` | `path` | `method` | `type` | Unit | Description | -| ------------------ | ------------- | -------------- | ---------------------------------------------- | ------------ | ------------------------------------------------------- | -| `duration` | resource path | request method | `cache-hit` or `cache-miss` if a caching table | milliseconds | Duration of request handler | -| `duration` | route path | request method | fastify-route | milliseconds | | -| `duration` | operation | | operation | milliseconds | | -| `success` | resource path | request method | | % | | -| `success` | route path | request method | fastify-route | % | | -| `success` | operation | | operation | % | | -| `bytes-sent` | resource path | request method | | bytes | | -| `bytes-sent` | route path | request method | fastify-route | bytes | | -| `bytes-sent` | operation | | operation | bytes | | -| `transfer` | resource path | request method | operation | milliseconds | duration of transfer | -| `transfer` | route path | request method | fastify-route | milliseconds | duration of transfer | -| `transfer` | operation | | operation | milliseconds | duration of transfer | -| `socket-routed` | | | | % | percentage of sockets that could be immediately routed | -| `tls-handshake` | | | | milliseconds | | -| `tls-reused` | | | | % | percentage of TLS that reuses sessions | -| `cache-hit` | table name | | | % | The percentage of cache hits | -| `cache-resolution` | table name | | | milliseconds | The duration of resolving requests for uncached entries | - -The following are metrics for real-time MQTT connections: -| `metric` | `path` | `method` | `type` | Unit | Description | -|--------------------|---------------|----------------|--------------------------------------------|--------------------------------------------------------|---------------------------------------------------------| -| `mqtt-connections` | | | | count | The number of open direct MQTT connections | -| `ws-connections` | | | | count | number of open WS connections| -| `connection` | `mqtt` | `connect` | | % | percentage of successful direct MQTT connections | -| `connection` | `mqtt` | `disconnect` | | % | percentage of explicit direct MQTT disconnects | -| `connection` | `ws` | `connect` | | % | percentage of successful WS connections | -| `connection` | `ws` | `disconnect` | | % | percentage of explicit WS disconnects | -| `bytes-sent` | topic | mqtt command | `mqtt` | bytes | The number of bytes sent for a given command and topic | - -The following are metrics for replication: - -| `metric` | `path` | `method` | `type` | Unit | Description | -| ---------------- | ------------- | ------------- | --------- | ----- | ----------------------------------------------------- | -| `bytes-sent` | node.database | `replication` | `egress` | bytes | The number of bytes sent for replication | -| `bytes-sent` | node.database | `replication` | `blob` | bytes | The number of bytes sent for replication of blobs | -| `bytes-received` | node.database | `replication` | `ingress` | bytes | The number of bytes received for replication | -| `bytes-received` | node.database | `replication` | `blob` | bytes | The number of bytes received for replication of blobs | - -The following are general resource usage statistics that are tracked: - -| `metric` | primary attribute(s) | other attribute(s) | Unit | Description | -| ------------------------- | ------------------------------------------------------------------------------------------------ | ------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------- | -| `database-size` | `size`, `used`, `free`, `audit` | `database` | bytes | The size of the database in bytes | -| `main-thread-utilization` | `idle`, `active`, `taskQueueLatency`, `rss`, `heapTotal`, `heapUsed`, `external`, `arrayBuffers` | `time` | various | Main thread resource usage; including idle time, active time, task queue latency, RSS, heap, buffer and external memory usage | -| `resource-usage` | | | various | [See breakout below](#resource-usage) | -| `storage-volume` | `available`, `free`, `size` | `database` | bytes | The size of the storage volume in bytes | -| `table-size` | `size` | `database`, `table` | bytes | The size of the table in bytes | -| `utilization` | | | % | How much of the time the worker was processing requests | - - -`resource-usage` metrics are everything returned by [node:process.resourceUsage()](https://nodejs.org/api/process.html#processresourceusage)[^1] plus the following additional metrics: - -| `metric` | Unit | Description | -| ---------------- | ---- | ----------------------------------------------------- | -| `time` | ms | Current time when metric was recorded (Unix time) | -| `period` | ms | Duration of the metric period | -| `cpuUtilization` | % | CPU utilization percentage (user and system combined) | - -[^1]: The `userCPUTime` and `systemCPUTime` metrics are converted to milliseconds to match the other time-related metrics. diff --git a/versioned_docs/version-4.6/reference/architecture.md b/versioned_docs/version-4.6/reference/architecture.md deleted file mode 100644 index 4155d5ff..00000000 --- a/versioned_docs/version-4.6/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -Harper's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/versioned_docs/version-4.6/reference/blob.md b/versioned_docs/version-4.6/reference/blob.md deleted file mode 100644 index 57dd7081..00000000 --- a/versioned_docs/version-4.6/reference/blob.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Blob ---- - -# Blob - -Blobs are binary large objects that can be used to store any type of unstructured/binary data and is designed for large content. Blobs support streaming and feature better performance for content larger than about 20KB. Blobs are built off the native JavaScript `Blob` type, and HarperDB extends the native `Blob` type for integrated storage with the database. To use blobs, you would generally want to declare a field as a `Blob` type in your schema: - -```graphql -type MyTable { - id: Any! @primaryKey - data: Blob -} -``` - -You can then create a blob which writes the binary data to disk, and can then be included (as a reference) in a record. For example, you can create a record with a blob like: - -```javascript -let blob = createBlob(largeBuffer); -await MyTable.put({ id: 'my-record', data: blob }); -``` - -The `data` attribute in this example is a blob reference, and can be used like any other attribute in the record, but it is stored separately, and the data must be accessed asynchronously. You can retrieve the blob data with the standard `Blob` methods: - -```javascript -let buffer = await blob.bytes(); -``` - -If you are creating a resource method, you can return a `Response` object with a blob as the body: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - let record = super.get(target); - return { - status: 200, - headers: {}, - body: record.data, // record.data is a blob - }; - } -} -``` - -When using the exported REST APIs for your tables, blobs will by default be treated with a UTF-8 encoding and contain text/plain content. - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "Why hello there, world!" - }' -``` - -To store arbitrary binary content (such as audio data) in a blob, using CBOR is recommended when making API requests. This will let you control the contents of the blob precisely. - -If you need to use JSON, Base64 encoding your contents can be a great choice, but you'll need to do a bit of work to control the encoding of the underlying blob: - -```typescript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; - - create(target: RequestTarget, record: Partial) { - if (record.data) { - record.data = Buffer.from(record.data, 'base64'); - } - return super.create(target, record); - } -} -``` - -Now you can create records and they'll be encoded appropriately. For example, here's a small .jpg encoded in base64: - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "/9j/4QDKRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAVgEbAAUAAAABAAAAXgEoAAMAAAABAAIAAAITAAMAAAABAAEAAIdpAAQAAAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAeQAAAHAAAABDAyMjGRAQAHAAAABAECAwCgAAAHAAAABDAxMDCgAQADAAAAAQABAACgAgAEAAAAAQAAABCgAwAEAAAAAQAAABCkBgADAAAAAQAAAAAAAAAAAAD/2wCEAAEBAQEBAQIBAQIDAgICAwQDAwMDBAYEBAQEBAYHBgYGBgYGBwcHBwcHBwcICAgICAgJCQkJCQsLCwsLCwsLCwsBAgICAwMDBQMDBQsIBggLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLC//dAAQAAf/AABEIABAAEAMBIgACEQEDEQH/xAGiAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgsQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APz68CaN8Mp/DWveJviDqE0R0qGIwWsGEaR532J83uwwABXH+MtP8N6Hryad4cvJrm3lgjlX7WES4R2zujcIAvy8YIHQ+1eYeKdAu9VtTNpUvk3aAeWSxCblOVJA4O08jIrR0/R1txDc37m4u0QK8p7tjkgdBmv2zD4apGvUq1KjcXtHTTRWP0nEUqzxcatKbUEkuWy5fN3+Lmvt0tp2t//Z" - }' -``` - -One of the important characteristics of blobs is they natively support asynchronous streaming of data. This is important for both creation and retrieval of large data. When we create a blob with `createBlob`, the returned blob will create the storage entry, but the data will be streamed to storage. This means that you can create a blob from a buffer or from a stream. You can also create a record that references a blob before the blob is fully written to storage. For example, you can create a blob from a stream: - -```javascript -let blob = createBlob(stream); -// at this point the blob exists, but the data is still being written to storage -await MyTable.put({ id: 'my-record', data: blob }); -// we now have written a record that references the blob -let record = await MyTable.get('my-record'); -// we now have a record that gives us access to the blob. We can asynchronously access the blob's data or stream the data, and it will be available as blob the stream is written to the blob. -let stream = record.data.stream(); -``` - -This can be powerful functionality for large media content, where content can be streamed into storage as it streamed out in real-time to users as it is received, or even for web content where low latency transmission of data from origin is critical. However, this also means that blobs are _not_ atomic or [ACID](https://en.wikipedia.org/wiki/ACID) compliant; streaming functionality achieves the opposite behavior of ACID/atomic writes that would prevent access to data as it is being written, and wait until data is fully available before a commit. Alternately, we can also use the `saveBeforeCommit` flag to indicate that the blob should be fully written to storage before committing a transaction to ensure that the whole blob is available before the transaction commits and writes the record: - -```javascript -let blob = createBlob(stream, { saveBeforeCommit: true }); -// this put will not commit and resolve until the blob is written and then the record is written -await MyTable.put({ id: 'my-record', data: blob }); -``` - -Note that using `saveBeforeCommit` does not necessarily guarantee full ACID compliance. This can be combined with the `flush` flag to provide a stronger guarantee that a blob is flushed to disk before commiting a transaction. However, the error handling below provides a stronger guarantee of proper blob handling when the process of streaming/writing a blob is interrupted and using proper error handling is recommended, instead of relying `saveBeforeCommit`, for the best combination reliability and performance. - -### Error Handling - -Because blobs can be streamed and referenced prior to their completion, there is a chance that an error or interruption could occur while streaming data to the blob (after the record is committed). We can create an error handler for the blob to handle the case of an interrupted blob: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - const record = super.get(target); - let blob = record.data; - blob.on('error', () => { - // if this was a caching table, we may want to invalidate or delete this record: - MyTable.invalidate(target); - // we may want to re-retrieve the blob - }); - return { - status: 200, - headers: {}, - body: blob - }); - } -} -``` - -### Blob `size` - -Blobs that are created from streams may not have the standard `size` property available, because the size may not be known while data is being streamed. Consequently, the `size` property may be undefined until the size is determined. You can listen for the `size` event to be notified when the size is available: - -```javascript -let record = await MyTable.get('my-record'); -let blob = record.data; -blob.size; // will be available if it was saved with a known size -let stream = blob.stream(); // start streaming the data -if (blob.size === undefined) { - blob.on('size', (size) => { - // will be called once the size is available - }); -} -``` - -### Blob Coercion - -When a field is defined to use the `Blob` type, any strings or buffers that are assigned to that field in a `put`, `patch`, or `publish`, will automatically be coerced to a `Blob`. This makes it easy to use a `Blob` type even with JSON data that may come HTTP request bodies or MQTT messages, that do not natively support a `Blob` type. - -See the [configuration](../deployments/configuration) documentation for more information on configuring where blob are stored. diff --git a/versioned_docs/version-4.6/reference/clustering/certificate-management.md b/versioned_docs/version-4.6/reference/clustering/certificate-management.md deleted file mode 100644 index 43839a4b..00000000 --- a/versioned_docs/version-4.6/reference/clustering/certificate-management.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box Harper generates certificates that are used when Harper nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the Harper node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that Harper generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your Harper cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make Harper start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart Harper. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other Harper nodes and to make requests to other Harper nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart Harper. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.6/reference/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.6/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 0a8b2a6c..00000000 --- a/versioned_docs/version-4.6/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via Harper users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, Harper must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.6/reference/clustering/enabling-clustering.md b/versioned_docs/version-4.6/reference/clustering/enabling-clustering.md deleted file mode 100644 index 606bc29c..00000000 --- a/versioned_docs/version-4.6/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install Harper**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.6/reference/clustering/establishing-routes.md b/versioned_docs/version-4.6/reference/clustering/establishing-routes.md deleted file mode 100644 index 1d4d5ae2..00000000 --- a/versioned_docs/version-4.6/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the Harper configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.6/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.6/reference/clustering/index.md b/versioned_docs/version-4.6/reference/clustering/index.md deleted file mode 100644 index fddd3851..00000000 --- a/versioned_docs/version-4.6/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: NATS Clustering ---- - -# NATS Clustering - -Harper 4.0 - 4.3 used a clustering system based on NATS for replication. In 4.4+, Harper has moved to a new native replication system that has better performance, reliability, and data consistency. This document describes the legacy NATS clustering system. Harper clustering is the process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. - -Harper’s clustering engine replicates data between instances of Harper using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -Harper simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting Harper focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to Harper, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.6/reference/clustering/managing-subscriptions.md b/versioned_docs/version-4.6/reference/clustering/managing-subscriptions.md deleted file mode 100644 index f043c9d1..00000000 --- a/versioned_docs/version-4.6/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Managing subscriptions ---- - -Tables are replicated when the table is designated as replicating and there is subscription between the nodes. -Tables designated as replicating by default, but can be changed by setting `replicate` to `false` in the table definition: - -```graphql -type Product @table(replicate: false) { - id: ID! - name: String! -} -``` - -Or in your harperdb-config.yaml, you can set the default replication behavior for databases, and indicate which databases -should be replicated by default: - -```yaml -replication: - databases: data -``` - -If a table is not in the list of databases to be replicated, it will not be replicated unless the table is specifically set to replicate: - -```graphql -type Product @table(replicate: true) { - id: ID! - name: String! -} -``` - -Reading hdb*nodes (what we do \_to* the node, not what the node does). - -The subscription can be set to publish, subscribe, or both. - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `set_node_replication`. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "data", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "database": "data", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "database": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.6/reference/clustering/naming-a-node.md b/versioned_docs/version-4.6/reference/clustering/naming-a-node.md deleted file mode 100644 index 7a512efb..00000000 --- a/versioned_docs/version-4.6/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.6/reference/clustering/requirements-and-definitions.md b/versioned_docs/version-4.6/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 22bc3977..00000000 --- a/versioned_docs/version-4.6/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of Harper running. - -\*_A node is a single instance/installation of Harper. A node of Harper can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a Harper cluster. diff --git a/versioned_docs/version-4.6/reference/clustering/subscription-overview.md b/versioned_docs/version-4.6/reference/clustering/subscription-overview.md deleted file mode 100644 index b4827de7..00000000 --- a/versioned_docs/version-4.6/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.6/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.6/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.6/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.6/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.6/reference/clustering/things-worth-knowing.md b/versioned_docs/version-4.6/reference/clustering/things-worth-knowing.md deleted file mode 100644 index f523c7bf..00000000 --- a/versioned_docs/version-4.6/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -Harper has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -Harper clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.6/clustering/figure6.png) diff --git a/versioned_docs/version-4.6/reference/components/applications.md b/versioned_docs/version-4.6/reference/components/applications.md deleted file mode 100644 index 2af170bf..00000000 --- a/versioned_docs/version-4.6/reference/components/applications.md +++ /dev/null @@ -1,184 +0,0 @@ ---- -title: Applications ---- - -# Applications - -> The contents of this page predominantly relate to **application** components. Extensions are not necessarily _deployable_. The ambiguity of the term "components" is being worked on and will be improved in future releases. As we work to clarify the terminology, please keep in mind that the component operations are synonymous with application management. In general, "components" is the general term for both applications and extensions, but in context of the operations API it refers to applications only. - -Harper offers several approaches to managing applications that differ between local development and Harper managed instances. This page will cover the recommended methods of developing, installing, deploying, and running Harper applications. - -## Local Development - -Harper is designed to be simple to run locally. Generally, Harper should be installed locally on a machine using a global package manager install (i.e. `npm i -g harperdb`). - -> Before continuing, ensure Harper is installed and the `harperdb` CLI is available. For more information, review the [installation guide](../../deployments/install-harper/). - -When developing an application locally there are a number of ways to run it on Harper. - -### `dev` and `run` commands - -The quickest way to run an application is by using the `dev` command within the application directory. - -The `harperdb dev .` command will automatically watch for file changes within the application directory and restart the Harper threads when changes are detected. - -The `dev` command will **not** restart the main thread; if this is a requirement, switch to using `run` instead and manually start/stop the process to execute the main thread. - -Stop execution for either of these processes by sending a SIGINT (generally CTRL+C) signal to the process. - -### Deploying to a local Harper instance - -Alternatively, to mimic interfacing with a hosted Harper instance, use operation commands instead. - -1. Start up Harper with `harperdb` -1. _Deploy_ the application to the local instance by executing: - - ```sh - harperdb deploy \ - project= \ - package= \ - restart=true - ``` - - - Make sure to omit the `target` option so that it _deploys_ to the Harper instance running locally - - The `package=` option creates a symlink to the application simplifying restarts - - By default, the `deploy` operation command will _deploy_ the current directory by packaging it up and streaming the bytes. By specifying `package`, it skips this and references the file path directly - - The `restart=true` option automatically restarts Harper threads after the application is deployed - - If set to `'rolling'`, a rolling restart will be triggered after the application is deployed - -1. In another terminal, use the `harperdb restart` command to restart the instance's threads at any time - - With `package=`, the application source is symlinked so changes will automatically be picked up between restarts - - If `package` was omitted, run the `deploy` command again with any new changes -1. To remove the application use `harperdb drop_component project=` - -Similar to the previous section, if the main thread needs to be restarted, start and stop the Harper instance manually (with the application deployed). Upon Harper startup, the application will automatically be loaded and executed across all threads. - -> Not all [component operations](../../developers/operations-api/components) are available via CLI. When in doubt, switch to using the Operations API via network requests to the local Harper instance. - -For example, to properly _deploy_ a `test-application` locally, the command would look like: - -```sh -harperdb deploy \ - project=test-application \ - package=/Users/dev/test-application \ - restart=true -``` - -> If the current directory is the application directory, use a shortcut such as `package=$(pwd)` to avoid typing out the complete path. - -Keep in mind that using a local file path for `package` will only work locally; deploying to a remote instance requires a different approach. - -## Remote Management - -Managing applications on a remote Harper instance is best accomplished through [component operations](../../developers/operations-api/components), similar to using the `deploy` command locally. Before continuing, always backup critical Harper instances. Managing, deploying, and executing applications can directly impact a live system. - -Remote Harper instances work very similarly to local Harper instances. The primary application management operations still include `deploy_component`, `drop_component`, and `restart`. - -The key to remote management is specifying a remote `target` along with appropriate username/password values. These can all be specified using CLI arguments: `target`, `username`, and `password`. Alternatively, the `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` environment variables can replace the `username` and `password` arguments. - -All together: - -```sh -harperdb deploy \ - project= \ - package= \ - username= \ - password= \ - target= \ - restart=true \ - replicated=true -``` - -Or, using environment variables: - -```sh -export CLI_TARGET_USERNAME= -export CLI_TARGET_PASSWORD= -harperdb deploy \ - project= \ - package= \ - target= \ - restart=true \ - replicated=true -``` - -Unlike local development where `package` should be set to a local file path for symlinking and improved development experience purposes, now it has some additional options. - -A local application can be deployed to a remote instance by **omitting** the `package` field. Harper will automatically package the local directory and include that along with the rest of the deployment operation. - -Furthermore, the `package` field can be set to any valid [npm dependency value](https://docs.npmjs.com/cli/v11/configuring-npm/package-json#dependencies). - -- For applications deployed to npm, specify the package name: `package="@harperdb/status-check"` -- For applications on GitHub, specify the URL: `package="https://github.com/HarperDB/status-check"`, or the shorthand `package=HarperDB/status-check` -- Private repositories also work if the correct SSH keys are on the server: `package="git+ssh://git@github.com:HarperDB/secret-applications.git"` - - Reference the [SSH Key](../../developers/operations-api/components#add-ssh-key) operations for more information on managing SSH keys on a remote instance -- Even tarball URLs are supported: `package="https://example.com/application.tar.gz"` - -> When using git tags, we highly recommend that you use the semver directive to ensure consistent and reliable installation by npm. In addition to tags, you can also reference branches or commit numbers. - -These `package` values are all supported because behind-the-scenes, Harper is generating a `package.json` file for the components. Then, it uses a form of `npm install` to resolve them as dependencies. This is why symlinks are generated when specifying a file path locally. The following [Advanced](#advanced) section explores this pattern in more detail. - -Finally, don't forget to include `restart=true`, or run `harperdb restart target=`. - -## Advanced - -The following methods are advanced and should be executed with caution as they can have unintended side-effects. Always backup any critical Harper instances before continuing. - -First, locate the Harper installation `rootPath` directory. Generally, this is `~/hdb`. It can be retrieved by running `harperdb get_configuration` and looking for the `rootPath` field. - -> For a useful shortcut on POSIX compliant machines run: `harperdb get_configuration json=true | jq ".rootPath" | sed 's/"/g'` - -This path is the Harper instance. Within this directory, locate the root config titled `harperdb-config.yaml`, and the components root path. The components root path will be `/components` by default (thus, `~/hdb/components`), but it can also be configured. If necessary, use `harperdb get_configuration` again and look for the `componentsRoot` field for the exact path. - -### Adding components to root - -Similar to how components can specify other components within their `config.yaml`, applications can be added to Harper by adding them to the `harperdb-config.yaml`. - -The configuration is very similar to that of `config.yaml`. Entries are comprised of a top-level `:`, and an indented `package: ` field. Any additional component options can also be included as indented fields. - -```yaml -status-check: - package: '@harperdb/status-check' -``` - -The key difference between this and a component's `config.yaml` is that the name does **not** need to be associated with a `package.json` dependency. When Harper starts up, it transforms these configurations into a `package.json` file, and then executes a form of `npm install`. Thus, the `package: ` can be any valid dependency syntax such as npm packages, GitHub repos, tarballs, and local directories are all supported. - -Given a root config like: - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from npm -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -Harper will generate a `package.json` like: - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -npm will install all the components and store them in ``. A symlink back to `/node_modules` is also created for dependency resolution purposes. - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -By specifying a file path, npm will generate a symlink and then changes will be automatically picked up between restarts. diff --git a/versioned_docs/version-4.6/reference/components/built-in-extensions.md b/versioned_docs/version-4.6/reference/components/built-in-extensions.md deleted file mode 100644 index 49ec5fcb..00000000 --- a/versioned_docs/version-4.6/reference/components/built-in-extensions.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -title: Built-In Extensions ---- - -# Built-In Extensions - -Harper provides extended features using built-in extensions. They do **not** need to be installed with a package manager, and simply must be specified in a config to run. These are used throughout many Harper docs, guides, and examples. Unlike custom extensions which have their own semantic versions, built-in extensions follow Harper's semantic version. - -For more information read the [Components, Applications, and Extensions](../../developers/applications/) documentation section. - -- [Built-In Extensions](#built-in-extensions) - - [fastifyRoutes](#fastifyroutes) - - [graphql](#graphql) - - [graphqlSchema](#graphqlschema) - - [jsResource](#jsresource) - - [loadEnv](#loadenv) - - [rest](#rest) - - [roles](#roles) - - [static](#static) - -## dataLoader - -Load data from JSON or YAML files into Harper tables as part of component deployment. - -This component is an [Extension](..#extensions) and can be configured with the `files` configuration option. - -Complete documentation for this feature is available here: [Data Loader](../../developers/applications/data-loader) - -```yaml -dataLoader: - files: 'data/*.json' -``` - -## fastifyRoutes - -Specify custom endpoints using [Fastify](https://fastify.dev/). - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Define Fastify Routes](../../developers/applications/define-routes) - -```yaml -fastifyRoutes: - files: 'routes/*.js' -``` - -## graphql - -> GraphQL querying is **experimental**, and only partially implements the GraphQL Over HTTP / GraphQL specifications. - -Enables GraphQL querying via a `/graphql` endpoint loosely implementing the GraphQL Over HTTP specification. - -Complete documentation for this feature is available here: [GraphQL](../graphql) - -```yaml -graphql: true -``` - -## graphqlSchema - -Specify schemas for Harper tables and resources via GraphQL schema syntax. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Schemas](../../developers/applications/defining-schemas) - -```yaml -graphqlSchema: - files: 'schemas.graphql' -``` - -## jsResource - -Specify custom, JavaScript based Harper resources. - -Refer to the Application [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) guide, or [Resource Class](../resources/) reference documentation for more information on custom resources. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -```yaml -jsResource: - files: 'resource.js' -``` - -## loadEnv - -Load environment variables via files like `.env`. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Ensure this component is specified first in `config.yaml` so that environment variables are loaded prior to loading any other components. - -```yaml -loadEnv: - files: '.env' -``` - -This component matches the default behavior of dotenv where existing variables take precedence. Specify the `override` option in order to override existing environment variables assigned to `process.env`: - -```yaml -loadEnv: - files: '.env' - override: true -``` - -> Important: Harper is a single process application. Environment variables are loaded onto `process.env` and will be shared throughout all Harper components. This means environment variables loaded by one component will be available on other components (as long as the components are loaded in the correct order). - - - - - - - - - -## rest - -Enable automatic REST endpoint generation for exported resources with this component. - -Complete documentation for this feature is available here: [REST](../../developers/rest) - -```yaml -rest: true -``` - -This component contains additional options: - -To enable `Last-Modified` header support: - -```yaml -rest: - lastModified: true -``` - -To disable automatic WebSocket support: - -```yaml -rest: - webSocket: false -``` - -## roles - -Specify roles for Harper tables and resources. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Roles](../../developers/applications/defining-roles) - -```yaml -roles: - files: 'roles.yaml' -``` - -## static - -Specify files to serve statically from the Harper HTTP endpoint. - -Use the [Resource Extension](./extensions#resource-extension) configuration options [`files` and `urlPath`](./extensions#resource-extension-configuration) to specify the files to be served. - -As specified by Harper's Resource Extension docs, the `files` option can be any glob pattern or a glob options object. This extension will serve all files matching the pattern, so make sure to be specific. - -To serve the entire `web` directory, specify `files: 'web/**'`. - -To serve only the html files within `web`, specify `files: 'web/*.html'` or `files: 'web/**/*.html'`. - -The `urlPath` option is the base URL path entries will be resolved to. For example, a `urlPath: 'static'` will serve all files resolved from `files` to the URL path `localhost/static/`. - -Given the `config.yaml`: - -```yaml -static: - files: 'web/*.html' - urlPath: 'static' -``` - -And the file directory structure: - -``` -component/ -├─ web/ -│ ├─ index.html -│ ├─ blog.html -├─ config.yaml - -``` - -The HTML files will be available at `localhost/static/index.html` and `localhost/static/blog.html` respectively. diff --git a/versioned_docs/version-4.6/reference/components/configuration.md b/versioned_docs/version-4.6/reference/components/configuration.md deleted file mode 100644 index 2175a03d..00000000 --- a/versioned_docs/version-4.6/reference/components/configuration.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: Component Configuration ---- - -# Component Configuration - -> For information on the distinction between the types of components (applications and extensions), refer to beginning of the [Applications](../../developers/applications) documentation section. - -Harper components are configured with a `config.yaml` file located in the root of the component module directory. This file is how an components configures other components it depends on. Each entry in the file starts with a component name, and then configuration values are indented below it. - -```yaml -name: - option-1: value - option-2: value -``` - -It is the entry's `name` that is used for component resolution. It can be one of the [built-in extensions](./built-in-extensions), or it must match a package dependency of the component as specified by `package.json`. The [Custom Component Configuration](#custom-component-configuration) section provides more details and examples. - -For some built-in extensions they can be configured with as little as a top-level boolean; for example, the [rest](./built-in-extensions#rest) extension can be enabled with just: - -```yaml -rest: true -``` - -Most components generally have more configuration options. Some options are ubiquitous to the Harper platform, such as the `files` and `urlPath` options for an [extension](./extensions) or [plugin](./plugins), or `package` for any [custom component](#custom-component-configuration). - -[Extensions](./extensions) and [plugins](./plugins) require specifying the `extensionModule` or `pluginModule` option respectively. Refer to their respective API reference documentation for more information. - -## Custom Component Configuration - -Any custom component **must** be configured with the `package` option in order for Harper to load that component. When enabled, the name of package must match a dependency of the component. For example, to use the `@harperdb/nextjs` extension, it must first be included in `package.json`: - -```json -{ - "dependencies": { - "@harperdb/nextjs": "1.0.0" - } -} -``` - -Then, within `config.yaml` it can be enabled and configured using: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - # ... -``` - -Since npm allows for a [variety of dependency configurations](https://docs.npmjs.com/cli/configuring-npm/package-json#dependencies), this can be used to create custom references. For example, to depend on a specific GitHub branch, first update the `package.json`: - -```json -{ - "dependencies": { - "harper-nextjs-test-feature": "HarperDB/nextjs#test-feature" - } -} -``` - -And now in `config.yaml`: - -```yaml -harper-nextjs-test-feature: - package: '@harperdb/nextjs' - files: './' - # ... -``` - -## Default Component Configuration - -Harper components do not need to specify a `config.yaml`. Harper uses the following default configuration to load components. - -```yaml -rest: true -graphqlSchema: - files: '*.graphql' -roles: - files: 'roles.yaml' -jsResource: - files: 'resources.js' -fastifyRoutes: - files: 'routes/*.js' - urlPath: '.' -static: - files: 'web/**' -``` - -Refer to the [built-in components](./built-in-extensions) documentation for more information on these fields. - -If a `config.yaml` is defined, it will **not** be merged with the default config. diff --git a/versioned_docs/version-4.6/reference/components/extensions.md b/versioned_docs/version-4.6/reference/components/extensions.md deleted file mode 100644 index 78012b7b..00000000 --- a/versioned_docs/version-4.6/reference/components/extensions.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -title: Extensions API ---- - -# Extensions API - -> As of Harper v4.6, a new iteration of the extension API was released called **Plugins**. They are simultaneously a simplification and an extensibility upgrade. Plugins are **experimental**, but we encourage developers to consider developing with the [plugin API](./plugins) instead of the extension API. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -There are two key types of Extensions: **Resource Extension** and **Protocol Extensions**. The key difference is a **Protocol Extensions** can return a **Resource Extension**. - -Furthermore, what defines an extension separately from a component is that it leverages any of the [Resource Extension](#resource-extension-api) or [Protocol Extension](#protocol-extension-api) APIs. - -All extensions must define a `config.yaml` file and declare an `extensionModule` option. This must be a path to the extension module source code. The path must resolve from the root of the module directory. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) `config.yaml` specifies `extensionModule: ./extension.js`. - -If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `extensionModule: ./dist/index.js`) - -## Resource Extension - -A Resource Extension is for processing a certain type of file or directory. For example, the built-in [jsResource](./built-in-extensions#jsresource) extension handles executing JavaScript files. - -Resource Extensions are comprised of four distinct function exports, [`handleFile()`](#handlefilecontents-urlpath-absolutepath-resources-void--promisevoid), [`handleDirectory()`](#handledirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void), [`setupFile()`](#setupfilecontents-urlpath-absolutepath-resources-void--promisevoid), and [`setupDirectory()`](#setupdirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void). The `handleFile()` and `handleDirectory()` methods are executed on **all worker threads**, and are _executed again during restarts_. The `setupFile()` and `setupDirectory()` methods are only executed **once** on the **main thread** during the initial system start sequence. - -> Keep in mind that the CLI command `harperdb restart` or CLI argument `restart=true` only restarts the worker threads. If a component is deployed using `harperdb deploy`, the code within the `setupFile()` and `setupDirectory()` methods will not be executed until the system is completely shutdown and turned back on. - -Other than their execution behavior, the `handleFile()` and `setupFile()` methods, and `handleDirectory()` and `setupDirectory()` methods have identical function definitions (arguments and return value behavior). - -### Resource Extension Configuration - -Any [Resource Extension](#resource-extension) can be configured with the `files` and `urlPath` options. These options control how _files_ and _directories_ are resolved in order to be passed to the extension's `handleFile()`, `setupFile()`, `handleDirectory()`, and `setupDirectory()` methods. - -> Harper relies on the [fast-glob](https://github.com/mrmlnc/fast-glob) library for glob pattern matching. - -- `files` - `string | string[] | Object` - _required_ - A [glob pattern](https://github.com/mrmlnc/fast-glob?tab=readme-ov-file#pattern-syntax) string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the extension. If specified as an object, the `source` property is required. By default, Harper **matches files and directories**; this is configurable using the `only` option. - - `source` - `string | string[]` - _required_ - The glob pattern string or array of strings. - - `only` - `'all' | 'files' | 'directories'` - _optional_ - The glob pattern will match only the specified entry type. Defaults to `'all'`. - - `ignore` - `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. - - If the value starts with `./`, such as `'./static/'`, the component name will be included in the base url path - - If the value is `.`, then the component name will be the base url path - - Note: `..` is an invalid pattern and will result in an error - - Otherwise, the value here will be base url path. Leading and trailing `/` characters will be handled automatically (`/static/`, `/static`, and `static/` are all equivalent to `static`) - -For example, to configure the [static](./built-in-extensions#static) component to serve all HTML files from the `web` source directory on the `static` URL endpoint: - -```yaml -static: - files: 'web/*.html' - urlPath: 'static' -``` - -If there are files such as `web/index.html` and `web/blog.html`, they would be available at `localhost/static/index.html` and `localhost/static/blog.html` respectively. - -Furthermore, if the component is located in the `test-component` directory, and the `urlPath` was set to `'./static/'` instead, then the files would be served from `localhost/test-component/static/*` instead. - -The `urlPath` is optional, for example to configure the [graphqlSchema](./built-in-extensions#graphqlschema) component to load all schemas within the `src/schema` directory, only specifying a `files` glob pattern is required: - -```yaml -graphqlSchema: - files: 'src/schema/*.schema' -``` - -The `files` option also supports a more complex options object. These additional fields enable finer control of the glob pattern matching. - -For example, to match files within `web`, and omit any within the `web/images` directory, the configuration could be: - -```yaml -static: - files: - source: 'web/**/*' - ignore: ['web/images'] -``` - -In order to match only files: - -```yaml -test-component: - files: - source: 'dir/**/*' - only: 'files' -``` - -### Resource Extension API - -In order for an extension to be classified as a Resource Extension it must implement at least one of the `handleFile()`, `handleDirectory()`, `setupFile()`, or `setupDirectory()` methods. As a standalone extension, these methods should be named and exported directly. For example: - -```js -// ESM -export function handleFile() {} -export function setupDirectory() {} - -// or CJS -function handleDirectory() {} -function setupFile() {} - -module.exports = { handleDirectory, setupFile }; -``` - -When returned by a [Protocol Extension](#protocol-extension), these methods should be defined on the object instead: - -```js -export function start() { - return { - handleFile() {}, - }; -} -``` - -#### `handleFile(contents, urlPath, absolutePath, resources): void | Promise` - -#### `setupFile(contents, urlPath, absolutePath, resources): void | Promise` - -These methods are for processing individual files. They can be async. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `contents` - `Buffer` - The contents of the file -- `urlPath` - `string` - The recommended URL path of the file -- `absolutePath` - `string` - The absolute path of the file - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `void | Promise` - -#### `handleDirectory(urlPath, absolutePath, resources): boolean | void | Promise` - -#### `setupDirectory(urlPath, absolutePath, resources): boolean | void | Promise` - -These methods are for processing directories. They can be async. - -If the function returns or resolves a truthy value, then the component loading sequence will end and no other entries within the directory will be processed. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `urlPath` - `string` - The recommended URL path of the directory -- `absolutePath` - `string` - The absolute path of the directory - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `boolean | void | Promise` - -## Protocol Extension - -A Protocol Extension is a more advanced form of a Resource Extension and is mainly used for implementing higher level protocols. For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) handles building and running a Next.js project. A Protocol Extension is particularly useful for adding custom networking handlers (see the [`server`](../globals#server) global API documentation for more information). - -### Protocol Extension Configuration - -In addition to the `files` and `urlPath` [Resource Extension configuration](#resource-extension-configuration) options, and the `package` [Custom Component configuration](#custom-component-configuration) option, Protocol Extensions can also specify additional configuration options. Any options added to the extension configuration (in `config.yaml`), will be passed through to the `options` object of the `start()` and `startOnMainThread()` methods. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs#options) specifies multiple option that can be included in its configuration. For example, a Next.js app using `@harperdb/nextjs` may specify the following `config.yaml`: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - files: './' - prebuilt: true - dev: false -``` - -Many protocol extensions will use the `port` and `securePort` options for configuring networking handlers. Many of the [`server`](../globals#server) global APIs accept `port` and `securePort` options, so components replicated this for simpler pass-through. - -### Protocol Extension API - -A Protocol Extension is made up of two distinct methods, [`start()`](#startoptions-resourceextension--promiseresourceextension) and [`startOnMainThread()`](#startonmainthreadoptions-resourceextension--promiseresourceextension). Similar to a Resource Extension, the `start()` method is executed on _all worker threads_, and _executed again on restarts_. The `startOnMainThread()` method is **only** executed **once** during the initial system start sequence. These methods have identical `options` object parameter, and can both return a Resource Extension (i.e. an object containing one or more of the methods listed above). - -#### `start(options): ResourceExtension | Promise` - -#### `startOnMainThread(options): ResourceExtension | Promise` - -Parameters: - -- `options` - `Object` - An object representation of the extension's configuration options. - -Returns: `Object` - An object that implements any of the [Resource Extension APIs](#resource-extension-api) diff --git a/versioned_docs/version-4.6/reference/components/index.md b/versioned_docs/version-4.6/reference/components/index.md deleted file mode 100644 index 30ce276d..00000000 --- a/versioned_docs/version-4.6/reference/components/index.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Components ---- - -# Components - -**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. Components encapsulate both applications and extensions. - -> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. - -**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. - -**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. - -> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. - -For more information on the differences between applications and extensions, refer to the beginning of the [Applications](../developers/applications/) guide documentation section. - -This technical reference section has detailed information on various component systems: - -- [Built-In Extensions](components/built-in-extensions) -- [Configuration](components/configuration) -- [Managing Applications](components/applications) -- [Extensions](components/extensions) -- [(Experimental) Plugins](components/plugins) - -## Custom Applications - -- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) -- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) -- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) - -## Custom Extensions - -- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) -- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) -- [`@harperdb/astro`](https://github.com/HarperDB/astro) diff --git a/versioned_docs/version-4.6/reference/components/plugins.md b/versioned_docs/version-4.6/reference/components/plugins.md deleted file mode 100644 index 51f33ffa..00000000 --- a/versioned_docs/version-4.6/reference/components/plugins.md +++ /dev/null @@ -1,619 +0,0 @@ ---- -title: Experimental Plugins ---- - -# Experimental Plugins - -The new, experimental **plugin** API is an iteration of the existing extension system. It simplifies the API by removing the need for multiple methods (`start`, `startOnMainThread`, `handleFile`, `setupFile`, etc.) and instead only requires a single `handleApplication` method. Plugins are designed to be more extensible and easier to use, and they are intended to replace the concept of extensions in the future. - -Similar to the existing extension API, a plugin must specify an `pluginModule` option within `config.yaml`. This must be a path to the plugin module source code. The path must resolve from the root of the module directory. For example: `pluginModule: plugin.js`. - -If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `pluginModule: ./dist/index.js`) - -It is also recommended that all extensions have a `package.json` that specifies JavaScript package metadata such as name, version, type, etc. Since plugins are just JavaScript packages, they can do anything a JavaScript package can normally do. It can be written in TypeScript, and compiled to JavaScript. It can export an executable (using the [bin](https://docs.npmjs.com/cli/configuring-npm/package-json#bin) property). It can be published to npm. The possibilities are endless! - -The key to a plugin is the [`handleApplication()`](#function-handleapplicationscope-scope-void--promisevoid) method. It must be exported by the `pluginModule`, and cannot coexist with any of the other extension methods such as `start`, `handleFile`, etc. The component loader will throw an error if both are defined. - -The `handleApplication()` method is executed **sequentially** across all **worker threads** during the component loading sequence. It receives a single, `scope` argument that contains all of the relevant metadata and APIs for interacting with the associated component. - -The method can be async and it is awaited by the component loader. - -However, it is highly recommended to avoid event-loop-blocking operations within the `handleApplication()` method. See the examples section for best practices on how to use the `scope` argument effectively. - -## Configuration - -As plugins are meant to be used by applications in order to implement some feature, many plugins provide a variety of configuration options to customize their behavior. Some plugins even require certain configuration options to be set in order to function properly. - -As a brief overview, the general configuration options available for plugins are: - -- `files` - `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - _optional_ - A glob pattern string or array of strings that specifies the files and directories to be handled by the plugin's default `EntryHandler` instance. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries handled by the plugin's default `EntryHandler` instance. -- `timeout` - `number` - _optional_ - The timeout in milliseconds for the plugin's operations. If not specified, the system default is **30 seconds**. Plugins may override the system default themselves, but this configuration option is the highest priority and takes precedence. - -### File Entries - -Just like extensions, plugins support the `files` and `urlPath` options for file entry matching. The values specified for these options are used for the default `EntryHandler` instance created by the `scope.handleEntry()` method. As the reference documentation details, similar options can be used to create custom `EntryHandler` instances too. - -The `files` option can be a glob pattern string, an array of glob pattern strings, or a more expressive glob options object. - -- The patterns **cannot** contain `..` or start with `/`. -- The pattern `.` or `./` is transformed into `**/*` automatically. -- Often, it is best to omit a leading `.` or `./` in the glob pattern. - -The `urlPath` option is a base URL path that is prepended to the resolved `files` entries. - -- It **cannot** contain `..`. -- If it starts with `./` or is just `.`, the name of the plugin will be automatically prepended to it. - -Putting this all together, to configure the [static](./built-in-extensions#static) built-in extension to serve files from the `web` directory but at the `/static/` path, the `config.yaml` would look like this: - -```yaml -static: - files: 'web/**/*' - urlPath: '/static/' -``` - -Keep in mind the `urlPath` option is completely optional. - -As another example, to configure the [graphqlSchema](./built-in-extensions#graphqlschema) built-in extension to serve only `*.graphql` files from within the top-level of the `src/schema` directory, the `config.yaml` would look like this: - -```yaml -graphqlSchema: - files: 'src/schema/*.graphql' -``` - -As detailed, the `files` option also supports a more complex object syntax for advanced use cases. - -For example, to match files within the `web` directory, and omit any within `web/images`, you can use a configuration such as: - -```yaml -static: - files: - source: 'web/**/*' - ignore: 'web/images/**' -``` - -> If you're transitioning from the [extension](./extensions) system, the `files` option object no longer supports an `only` field. Instead, use the `entryEvent.entryType` or the specific `entryEvent.eventType` fields in [`onEntryEventHandler(entryEvent)`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) method or any of the specific [`EntryHandler`](#class-entryhandler) events. - -### Timeouts - -The default timeout for all plugins is **30 seconds**. If the method does not complete within this time, the component loader will throw an error and unblock the component loading sequence. This is to prevent the component loader from hanging indefinitely if a plugin fails to respond or takes too long to execute. - -The plugin module can export a `defaultTimeout` variable (in milliseconds) that will override the system default. - -For example: - -```typescript -export const defaultTimeout = 60_000; // 60 seconds -``` - -Additionally, users can specify a `timeout` option in their application's `config.yaml` file for a specific plugin. This option takes precedence over the plugin's `defaultTimeout` and the system default. - -For example: - -```yaml -customPlugin: - package: '@harperdb/custom-plugin' - files: 'foo.js' - timeout: 45_000 # 45 seconds -``` - -## Example: Statically hosting files - -This is a functional example of how the `handleApplication()` method and `scope` argument can be used to create a simple static file server plugin. This example assumes that the component has a `config.yaml` with the `files` option set to a glob pattern that matches the files to be served. - -> This is a simplified form of the [static](./built-in-extensions#static) built-in extension. - -```js -export function handleApplication(scope) { - const staticFiles = new Map(); - - scope.options.on('change', (key, value, config) => { - if (key[0] === 'files' || key[0] === 'urlPath') { - // If the files or urlPath options change, we need to reinitialize the static files map - staticFiles.clear(); - logger.info(`Static files reinitialized due to change in ${key.join('.')}`); - } - }); - - scope.handleEntry((entry) => { - if (entry.entryType === 'directory') { - logger.info(`Cannot serve directories. Update the files option to only match files.`); - return; - } - - switch (entry.eventType) { - case 'add': - case 'change': - // Store / Update the file contents in memory for serving - staticFiles.set(entry.urlPath, entry.contents); - break; - case 'unlink': - // Remove the file from memory when it is deleted - staticFiles.delete(entry.urlPath); - break; - } - }); - - scope.server.http( - (req, next) => { - if (req.method !== 'GET') return next(req); - - // Attempt to retrieve the requested static file from memory - const staticFile = staticFiles.get(req.pathname); - - return staticFile - ? { - statusCode: 200, - body: staticFile, - } - : { - statusCode: 404, - body: 'File not found', - }; - }, - { runFirst: true } - ); -} -``` - -In this example, the entry handler method passed to `handleEntry` will manage the map of static files in memory using their computed `urlPath` and the `contents`. If the config file changes (and thus a new default file or url path is specified) the plugin will clear the file map as well to remove artifacts. Furthermore, it uses the `server.http()` middleware to hook into the HTTP request handling. - -This example is heavily simplified, but it demonstrates how the different key parts of `scope` can be used together to provide a performant and reactive application experience. - -## Function: `handleApplication(scope: Scope): void | Promise` - -Parameters: - -- `scope` - [`Scope`](#class-scope) - An instance of the `Scope` class that provides access to the relative application's configuration, resources, and other APIs. - -Returns: `void | Promise` - -This is the only method a plugin module must export. It can be async and is awaited by the component loader. The `scope` argument provides access to the relative application's configuration, resources, and other APIs. - -## Class: `Scope` - -- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -### Event: `'close'` - -Emitted after the scope is closed via the `close()` method. - -### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -### Event: `'ready'` - -Emitted when the Scope is ready to be used after loading the associated config file. It is awaited by the component loader, so it is not necessary to await it within the `handleApplication()` method. - -### `scope.close()` - -Returns: `this` - The current `Scope` instance. - -Closes all associated entry handlers, the associated `scope.options` instance, emits the `'close'` event, and then removes all other listeners on the instance. - -### `scope.handleEntry([files][, handler])` - -Parameters: - -- `files` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) | [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ -- `handler` - [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ - -Returns: [`EntryHandler`](#class-entryhandler) - An instance of the `EntryHandler` class that can be used to handle entries within the scope. - -The `handleEntry()` method is the key to handling file system entries specified by a `files` glob pattern option in `config.yaml`. This method is used to register an entry event handler, specifically for the `EntryHandler` [`'all'`](#event-all) event. The method signature is very flexible, and allows for the following variations: - -- `scope.handleEntry()` (with no arguments) Returns the default `EntryHandler` created by the `files` and `urlPath` options in the `config.yaml`. -- `scope.handleEntry(handler)` (where `handler` is an `onEntryEventHandler`) Returns the default `EntryHandler` instance (based on the options within `config.yaml`) and uses the provided `handler` for the [`'all'`](#event-all) event. -- `scope.handleEntry(files)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`) Returns a new `EntryHandler` instance that handles the specified `files` configuration. -- `scope.handleEntry(files, handler)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`, and `handler` is an `onEntryEventHandler`) Returns a new `EntryHandler` instance that handles the specified `files` configuration and uses the provided `handler` for the [`'all'`](#event-all) event. - -For example: - -```js -export function handleApplication(scope) { - // Get the default EntryHandler instance - const defaultEntryHandler = scope.handleEntry(); - - // Assign a handler for the 'all' event on the default EntryHandler - scope.handleEntry((entry) => { - /* ... */ - }); - - // Create a new EntryHandler for the 'src/**/*.js' files option with a custom `'all'` event handler. - const customEntryHandler = scope.handleEntry( - { - files: 'src/**/*.js', - }, - (entry) => { - /* ... */ - } - ); - - // Create another custom EntryHandler for the 'src/**/*.ts' files option, but without a `'all'` event handler. - const anotherCustomEntryHandler = scope.handleEntry({ - files: 'src/**/*.ts', - }); -} -``` - -And thus, if the previous code was used by a component with the following `config.yaml`: - -```yaml -customPlugin: - files: 'web/**/*' -``` - -Then the default `EntryHandler` instances would be created to handle all entries within the `web` directory. - -### `scope.requestRestart()` - -Returns: `void` - -Request a Harper restart. This **does not** restart the instance immediately, but rather indicates to the user that a restart is required. This should be called when the plugin cannot handle the entry event and wants to indicate to the user that the Harper instance should be restarted. - -This method is called automatically by the `scope` instance if the user has not defined an `scope.options.on('change')` handler or if an event handler exists and is missing a necessary handler method. - -### `scope.resources` - -Returns: `Map` - A map of the currently loaded [Resource](../globals#resource) instances. - -### `scope.server` - -Returns: `server` - A reference to the [server](../globals#server) global API. - -### `scope.options` - -Returns: [`OptionsWatcher`](#class-optionswatcher) - An instance of the `OptionsWatcher` class that provides access to the application's configuration options. Emits `'change'` events when the respective plugin part of the component's config file is modified. - -For example, if the plugin `customPlugin` is configured by an application with: - -```yaml -customPlugin: - files: 'foo.js' -``` - -And has the following `handleApplication(scope)` implementation: - -```typescript -export function handleApplication(scope) { - scope.options.on('change', (key, value, config) => { - if (key[0] === 'files') { - // Handle the change in the files option - scope.logger.info(`Files option changed to: ${value}`); - } - }); -} -``` - -Then modifying the `files` option in the `config.yaml` to `bar.js` would log the following: - -```plaintext -Files option changed to: bar.js -``` - -### `scope.logger` - -Returns: `logger` - A scoped instance of the [`logger`](../globals#logger) class that provides logging capabilities for the plugin. - -It is recommended to use this instead of the `logger` global. - -### `scope.name` - -Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. - -### `scope.directory` - -Returns: `string` - The directory of the application. This is the root directory of the component where the `config.yaml` file is located. - -## Interface: `FilesOption` - -- `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - -## Interface: `FilesOptionObject` - -- `source` - `string` | `string[]` - _required_ - The glob pattern string or array of strings. -- `ignore` - `string` | `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. - -## Interface: `FileAndURLPathConfig` - -- `files` - [`FilesOption`](#interface-filesoption) - _required_ - A glob pattern string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the plugin. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. - -## Class: `OptionsWatcher` - -- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -### Event: `'change'` - -- `key` - `string[]` - The key of the changed option split into parts (e.g. `foo.bar` becomes `['foo', 'bar']`). -- `value` - [`ConfigValue`](#interface-configvalue) - The new value of the option. -- `config` - [`ConfigValue`](#interface-configvalue) - The entire configuration object of the plugin. - -The `'change'` event is emitted whenever an configuration option is changed in the configuration file relative to the application and respective plugin. - -Given an application using the following `config.yaml`: - -```yaml -customPlugin: - files: 'web/**/*' -otherPlugin: - files: 'index.js' -``` - -The `scope.options` for the respective plugin's `customPlugin` and `otherPlugin` would emit `'change'` events when the `files` options relative to them are modified. - -For example, if the `files` option for `customPlugin` is changed to `web/**/*.js`, the following event would be emitted _only_ within the `customPlugin` scope: - -```js -scope.options.on('change', (key, value, config) => { - key; // ['files'] - value; // 'web/**/*.js' - config; // { files: 'web/**/*.js' } -}); -``` - -### Event: `'close'` - -Emitted when the `OptionsWatcher` is closed via the `close()` method. The watcher is not usable after this event is emitted. - -### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -### Event: `'ready'` - -- `config` - [`ConfigValue`](#interface-configvalue) | `undefined` - The configuration object of the plugin, if present. - -This event can be emitted multiple times. It is first emitted upon the initial load, but will also be emitted after restoring a configuration file or configuration object after a `'remove'` event. - -### Event: `'remove'` - -The configuration was removed. This can happen if the configuration file was deleted, the configuration object within the file is deleted, or if the configuration file fails to parse. Once restored, the `'ready'` event will be emitted again. - -### `options.close()` - -Returns: `this` - The current `OptionsWatcher` instance. - -Closes the options watcher, removing all listeners and preventing any further events from being emitted. The watcher is not usable after this method is called. - -### `options.get(key)` - -Parameters: - -- `key` - `string[]` - The key of the option to get, split into parts (e.g. `foo.bar` is represented as `['foo', 'bar']`). - -Returns: [`ConfigValue`](#interface-configvalue) | `undefined` - -If the config is defined it will attempt to retrieve the value of the option at the specified key. If the key does not exist, it will return `undefined`. - -### `options.getAll()` - -Returns: [`ConfigValue`](#interface-configvalue) | `undefined` - -Returns the entire configuration object for the plugin. If the config is not defined, it will return `undefined`. - -### `options.getRoot()` - -Returns: [`Config`](#interface-config) | `undefined` - -Returns the root configuration object of the application. This is the entire configuration object, basically the parsed form of the `config.yaml`. If the config is not defined, it will return `undefined`. - -### Interface: `Config` - -- `[key: string]` [`ConfigValue`](#interface-configvalue) - -An object representing the `config.yaml` file configuration. - -### Interface: `ConfigValue` - -- `string` | `number` | `boolean` | `null` | `undefined` | `ConfigValue[]` | [`Config`](#interface-config) - -Any valid configuration value type. Essentially, the primitive types, an array of those types, or an object comprised of values of those types. - -## Class: `EntryHandler` - -Extends: [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -Created by calling [`scope.handleEntry()`](#scopehandleentry) method. - -### Event: `'all'` - -- `entry` - [`FileEntry`](#interface-fileentry) | [`DirectoryEntry`](#interface-directoryentry) - The entry that was added, changed, or removed. - -The `'all'` event is emitted for all entry events, including file and directory events. This is the event that the handler method in `scope.handleEntry` is registered for. The event handler receives an `entry` object that contains the entry metadata, such as the file contents, URL path, and absolute path. - -An effective pattern for this event is: - -```js -async function handleApplication(scope) { - scope.handleEntry((entry) => { - switch (entry.eventType) { - case 'add': - // Handle file addition - break; - case 'change': - // Handle file change - break; - case 'unlink': - // Handle file deletion - break; - case 'addDir': - // Handle directory addition - break; - case 'unlinkDir': - // Handle directory deletion - break; - } - }); -} -``` - -### Event: `'add'` - -- `entry` - [`AddFileEvent`](#interface-addfileevent) - The file entry that was added. - -The `'add'` event is emitted when a file is created (or the watcher sees it for the first time). The event handler receives an `AddFileEvent` object that contains the file contents, URL path, absolute path, and other metadata. - -### Event: `'addDir'` - -- `entry` - [`AddDirectoryEvent`](#interface-adddirectoryevent) - The directory entry that was added. - -The `'addDir'` event is emitted when a directory is created (or the watcher sees it for the first time). The event handler receives an `AddDirectoryEvent` object that contains the URL path and absolute path of the directory. - -### Event: `'change'` - -- `entry` - [`ChangeFileEvent`](#interface-changefileevent) - The file entry that was changed. - -The `'change'` event is emitted when a file is modified. The event handler receives a `ChangeFileEvent` object that contains the updated file contents, URL path, absolute path, and other metadata. - -### Event: `'close'` - -Emitted when the entry handler is closed via the [`entryHandler.close()`](#entryhandlerclose) method. - -### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -### Event: `'ready'` - -Emitted when the entry handler is ready to be used. This is not automatically awaited by the component loader, but also is not required. Calling `scope.handleEntry()` is perfectly sufficient. This is generally useful if you need to do something _after_ the entry handler is absolutely watching and handling entries. - -### Event: `'unlink'` - -- `entry` - [`UnlinkFileEvent`](#interface-unlinkfileevent) - The file entry that was deleted. - -The `'unlink'` event is emitted when a file is deleted. The event handler receives an `UnlinkFileEvent` object that contains the URL path and absolute path of the deleted file. - -### Event: `'unlinkDir'` - -- `entry` - [`UnlinkDirectoryEvent`](#interface-unlinkdirectoryevent) - The directory entry that was deleted. - -The `'unlinkDir'` event is emitted when a directory is deleted. The event handler receives an `UnlinkDirectoryEvent` object that contains the URL path and absolute path of the deleted directory. - -### `entryHandler.name` - -Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. - -The name of the plugin. - -### `entryHandler.directory` - -Returns: `string` - -The directory of the application. This is the root directory of the component where the `config.yaml` file is located. - -### `entryHandler.close()` - -Returns: `this` - The current `EntryHandler` instance. - -Closes the entry handler, removing all listeners and preventing any further events from being emitted. The handler can be started again using the [`entryHandler.update()`](#entryhandlerupdateconfig) method. - -### `entryHandler.update(config)` - -Parameters: - -- `config` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) - The configuration object for the entry handler. - -This method will update an existing entry handler to watch new entries. It will close the underlying watcher and create a new one, but will maintain any existing listeners on the EntryHandler instance itself. - -This method returns a promise associated with the ready event of the updated handler. - -### Interface: `BaseEntry` - -- `stats` - [`fs.Stats`](https://nodejs.org/docs/latest/api/fs.html#class-fsstats) | `undefined` - The file system stats for the entry. -- `urlPath` - `string` - The recommended URL path of the entry. -- `absolutePath` - `string` - The absolute path of the entry. - -The foundational entry handle event object. The `stats` may or may not be present depending on the event, entry type, and platform. - -The `urlPath` is resolved based on the configured pattern (`files:` option) combined with the optional `urlPath` option. This path is generally useful for uniquely representing the entry. It is used in the built-in components such as `jsResource` and `static`. - -The `absolutePath` is the file system path for the entry. - -### Interface: `FileEntry` - -Extends [`BaseEntry`](#interface-baseentry) - -- `contents` - `Buffer` - The contents of the file. - -A specific extension of the `BaseEntry` interface representing a file entry. We automatically read the contents of the file so the user doesn't have to bother with FS operations. - -There is no `DirectoryEntry` since there is no other important metadata aside from the `BaseEntry` properties. If a user wants the contents of a directory, they should adjust the pattern to resolve files instead. - -### Interface: `EntryEvent` - -Extends [`BaseEntry`](#interface-baseentry) - -- `eventType` - `string` - The type of entry event. -- `entryType` - `string` - The type of entry, either a file or a directory. - -A general interface representing the entry handle event objects. - -### Interface: `AddFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'add'` -- `entryType` - `'file'` - -Event object emitted when a file is created (or the watcher sees it for the first time). - -### Interface: `ChangeFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'change'` -- `entryType` - `'file'` - -Event object emitted when a file is modified. - -### Interface: `UnlinkFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'unlink'` -- `entryType` - `'file'` - -Event object emitted when a file is deleted. - -### Interface: `FileEntryEvent` - -- `AddFileEvent` | `ChangeFileEvent` | `UnlinkFileEvent` - -A union type representing the file entry events. These events are emitted when a file is created, modified, or deleted. The `FileEntry` interface provides the file contents and other metadata. - -### Interface: `AddDirectoryEvent` - -Extends [`EntryEvent`](#interface-entryevent) - -- `eventType` - `'addDir'` -- `entryType` - `'directory'` - -Event object emitted when a directory is created (or the watcher sees it for the first time). - -### Interface: `UnlinkDirectoryEvent` - -Extends [`EntryEvent`](#interface-entryevent) - -- `eventType` - `'unlinkDir'` -- `entryType` - `'directory'` - -Event object emitted when a directory is deleted. - -### Interface: `DirectoryEntryEvent` - -- `AddDirectoryEvent` | `UnlinkDirectoryEvent` - -A union type representing the directory entry events. There are no change events for directories since they are not modified in the same way as files. - -### Function: `onEntryEventHandler(entryEvent: FileEntryEvent | DirectoryEntryEvent): void` - -Parameters: - -- `entryEvent` - [`FileEntryEvent`](#interface-fileentryevent) | [`DirectoryEntryEvent`](#interface-directoryentryevent) - -Returns: `void` - -This function is what is passed to the `scope.handleEntry()` method as the handler for the `'all'` event. This is also applicable to a custom `.on('all', handler)` method for any `EntryHandler` instance. diff --git a/versioned_docs/version-4.6/reference/content-types.md b/versioned_docs/version-4.6/reference/content-types.md deleted file mode 100644 index b7d223f4..00000000 --- a/versioned_docs/version-4.6/reference/content-types.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -Harper supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. Harper follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard Harper operations. - -:::tip Need a custom content type? - -Harper's extensible content type system lets you add support for any serialization format (XML, YAML, proprietary formats, etc.) by registering custom handlers in the [`contentTypes`](./globals.md#contenttypes) global Map. See the linked API reference for detailed implementation types, handler properties, and examples. - -::: - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by Harper, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with Harper. CBOR supports the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and Harper's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all Harper data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with Harper's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. - -In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/versioned_docs/version-4.6/reference/data-types.md b/versioned_docs/version-4.6/reference/data-types.md deleted file mode 100644 index 8dd902e6..00000000 --- a/versioned_docs/version-4.6/reference/data-types.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -Harper supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (Harper’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. Harper supports MessagePack and CBOR, which allows for all of Harper supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: - -- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") -- `Int` - Any integer between from -2147483648 to 2147483647 -- `Long` - Any integer between from -9007199254740992 to 9007199254740992 -- `BigInt` - Any integer (negative or positive) with less than 300 digits - -Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in Harper. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in Harper’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in Harper. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in Harper property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well, with two different data types that are available: - -### Bytes - -JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -### Blobs - -Binary data can also be stored with [`Blob`s](./blob), which can scale much better for larger content than `Bytes`, as it is designed to be streamed and does not need to be held entirely in memory. It is recommended that `Blob`s are used for content larger than 20KB. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in Harper as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.6/reference/dynamic-schema.md b/versioned_docs/version-4.6/reference/dynamic-schema.md deleted file mode 100644 index 161c3b13..00000000 --- a/versioned_docs/version-4.6/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. Harper tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -Harper databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -Harper tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in Harper operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [Harper Storage Algorithm](./storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to Harper. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. Harper offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -Harper automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [Harper API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.6/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.6/reference/globals.md b/versioned_docs/version-4.6/reference/globals.md deleted file mode 100644 index bb2bb831..00000000 --- a/versioned_docs/version-4.6/reference/globals.md +++ /dev/null @@ -1,424 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with Harper is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that Harper provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -## `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -## `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -## `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](./resources/) for more details about implementing a Resource class. - -## `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -## `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -## `server` - -The `server` global object provides a number of functions and objects to interact with Harper's HTTP, networking, and authentication services. - -### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` - -Alias: `server.request` - -Add a handler method to the HTTP server request listener middleware chain. - -Returns an array of server instances based on the specified `options.port` and `options.securePort`. - -Example: - -```js -server.http( - (request, next) => { - return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); - }, - { - runFirst: true, // run this handler first - } -); -``` - -#### `RequestListener` - -Type: `(request: Request, next: RequestListener) => Promise` - -The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. - -### `Request` and `Response` - -The `Request` and `Response` classes are based on the WHATWG APIs for the [`Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) and [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) classes. Requests and responses are based on these standard-based APIs to facilitate reuse with modern web code. While Node.js' HTTP APIs are powerful low-level APIs, the `Request`/`Response` APIs provide excellent composability characteristics, well suited for layered middleware and for clean mapping to [RESTful method handlers](./resources/) with promise-based responses, as well as interoperability with other standards-based APIs like [streams](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) used with [`Blob`s](https://developer.mozilla.org/en-US/docs/Web/API/Blob). However, the Harper implementation of these classes is not a direct implementation of the WHATWG APIs, but implements additional/distinct properties for the the Harper server environment: - -#### `Request` - -A `Request` object is passed to the direct static REST handlers, and preserved as the context for instance methods, and has the following properties: - -- `url` - This is the request target, which is the portion of the URL that was received by the server. If a client sends a request to `https://example.com:8080/path?query=string`, the actual received request is `GET /path?query=string` and the `url` property will be `/path?query=string`. -- `method` - This is the HTTP method of the request. This is a string like `GET`, `POST`, `PUT`, `DELETE`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the request. -- `pathname` - This is the path portion of the URL, without the query string. For example, if the URL is `/path?query=string`, the `pathname` will be `/path`. -- `protocol` - This is the protocol of the request, like `http` or `https`. -- `data` - This is the deserialized body of the request (based on the type of data specified by `Content-Type` header). -- `ip` - This is the remote IP address of the client that made the request (or the remote IP address of the last proxy to connect to Harper). -- `host` - This is the host of the request, like `example.com`. -- `sendEarlyHints(link: string, headers?: object): void` - This method sends an early hints response to the client, prior to actually returning a response. This is useful for sending a link header to the client to indicate that another resource should be preloaded. The `headers` argument can be used to send additional headers with the early hints response, in addition to the `link`. This is generally most helpful in a cache resolution function, where you can send hints _if_ the data is not in the cache and is resolving from an origin: - -```javascript -class Origin { - async get(request) { - // if we are fetching data from origin, send early hints - this.getContext().requestContext.sendEarlyHints(''); - let response = await fetch(request); - ... - } -} -Cache.sourcedFrom(Origin); -``` - -- `login(username, password): Promise` - This method can be called to start an authenticated session. The login will authenticate the user by username and password. If the authentication was successful, a session will be created and a cookie will be set on the response header that references the session. All subsequent requests from the client that sends the cookie in requests will be authenticated as the user that logged in and the session record will be attached to the request. This method returns a promise that resolves when the login is successful, and rejects if the login is unsuccessful. -- `session` - This is the session object that is associated with current cookie-maintained session. This object is used to store session data for the current session. This is `Table` record instance, and can be updated by calling `request.session.update({ key: value })` or session can be retrieved with `request.session.get()`. If the cookie has not been set yet, a cookie will be set the first time a session is updated or a login occurs. -- `_nodeRequest` - This is the underlying Node.js [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) object. This can be used to access the raw request data, such as the raw headers, raw body, etc. However, this is discouraged and should be used with caution since it will likely break any other server handlers that depends on the layered `Request` call with `Response` return pattern. -- `_nodeResponse` - This is the underlying Node.js [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) object. This can be used to access the raw response data, such as the raw headers. Again, this is discouraged and can cause problems for middleware, should only be used if you are certain that other server handlers will not attempt to return a different `Response` object. - -#### `Response` - -REST methods can directly return data that is serialized and returned to users, or it can return a `Response` object (or a promise to a `Response`), or it can return a `Response`-like object with the following properties (or again, a promise to it): - -- `status` - This is the HTTP status code of the response. This is a number like `200`, `404`, `500`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the response. -- `data` - This is the data to be returned of the response. This will be serialized using Harper's content negotiation. -- `body` - Alternately (to `data`), the raw body can be returned as a `Buffer`, string, stream (Node.js or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)), or a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). - -#### `HttpOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -#### `HttpServer` - -Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. - -### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` - -Creates a socket server on the specified `options.port` or `options.securePort`. - -Only one socket server will be created. A `securePort` takes precedence. - -#### `ConnectionListener` - -Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) - -#### `SocketOptions` - -- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. -- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -#### `SocketServer` - -Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` - -Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](./globals#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. - -Example: - -```js -server.ws((ws, request, chainCompletion) => { - chainCompletion.then(() => { - ws.on('error', console.error); - - ws.on('message', function message(data) { - console.log('received: %s', data); - }); - - ws.send('something'); - }); -}); -``` - -#### `WsListener` - -Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` - -The WebSocket connection listener. - -- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. -- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. -- The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. -- The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` - -#### `WsOptions` - -Type: `Object` - -Properties: - -- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` - -Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](./globals#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. - -This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. - -Example: - -> This example is from the Harper Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) - -```js -server.upgrade( - (request, socket, head, next) => { - if (request.url === '/_next/webpack-hmr') { - return upgradeHandler(request, socket, head).then(() => { - request.__harperdb_request_upgraded = true; - - next(request, socket, head); - }); - } - - return next(request, socket, head); - }, - { runFirst: true } -); -``` - -#### `UpgradeListener` - -Type: `(request, socket, head, next) => void` - -The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. - -#### `UpgradeOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.config` - -This provides access to the Harper configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into Harper's analytics. Harper efficiently records and tracks these metrics and makes them available through [analytics API](./analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. - -### `server.getUser(username): Promise` - -This returns the user object with permissions/authorization information based on the provided username. This does not verify the password, so it is generally used for looking up users by username. If you want to verify a user by password, use [`server.authenticateUser`](./globals#serverauthenticateuserusername-password-user). - -### `server.authenticateUser(username, password): Promise` - -This returns the user object with permissions/authorization information based on the provided username. The password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -### `server.resources: Resources` - -This provides access to the map of all registered resources. This is the central registry in Harper for registering any resources to be exported for use by REST, MQTT, or other components. Components that want to register resources should use the `server.resources.set(name, resource)` method to add to this map. Exported resources can be found by passing in a path to `server.resources.getMatch(path)` which will find any resource that matches the path or beginning of the path. - -#### `server.resources.set(name, resource, exportTypes?)` - -Register a resource with the server. For example: - -``` -class NewResource extends Resource { -} -server.resources.set('NewResource', Resource); -/ or limit usage: -server.resources.set('NewResource', Resource, { rest: true, mqtt: false, 'my-protocol': true }); -``` - -#### `server.resources.getMatch(path, exportType?)` - -Find a resource that matches the path. For example: - -``` -server.resources.getMatch('/NewResource/some-id'); -/ or specify the export/protocol type, to allow it to be limited: -server.resources.getMatch('/NewResource/some-id', 'my-protocol'); -``` - -### `server.operation(operation: Object, context?: Object, authorize?: boolean)` - -Execute an operation from the [Operations API](../developers/operations-api) - -Parameters: - -- `operation` - `Object` - Object matching desired operation's request body -- `context` - `Object` - `{ username: string}` - _optional_ - The specified user -- `authorize` - `boolean` - _optional_ - Indicate the operation should authorize the user or not. Defaults to `false` - -Returns a `Promise` with the operation's response as per the [Operations API documentation](../developers/operations-api). - -### `server.nodes` - -Returns an array of node objects registered in the cluster - -### `server.shards` - -Returns map of shard number to an array of its associated nodes - -### `server.hostname` - -Returns the hostname of the current node - -### `server.contentTypes` - -Returns the `Map` of registered content type handlers. Same as the [`contentTypes`](./globals#contenttypes) global. - -## `contentTypes` - -Returns a [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for request/response serialization. - -HarperDB uses content negotiation to automatically handle data serialization and deserialization for HTTP requests and other protocols. This process works by: - -1. **Request Processing**: Comparing the `Content-Type` header with registered handlers to deserialize incoming data into structured formats for processing and storage -2. **Response Generation**: Comparing the `Accept` header with registered handlers to serialize structured data into the appropriate response format - -### Built-in Content Types - -HarperDB includes handlers for common formats: - -- **JSON** (`application/json`) -- **CBOR** (`application/cbor`) -- **MessagePack** (`application/msgpack`) -- **CSV** (`text/csv`) -- **Event-Stream** (`text/event-stream`) -- And more... - -For detailed information about each built-in content type, including usage recommendations and performance characteristics, see the [Content Types reference](./content-types.md). - -### Custom Content Type Handlers - -You can extend or replace content type handlers by modifying the `contentTypes` map from the `server` global (or `harperdb` export). The map is keyed by MIME type, with values being handler objects containing these optional properties: - -#### Handler Properties - -- **`serialize(data: any): Buffer | Uint8Array | string`** - Called to convert data structures into the target format for responses. Should return binary data (Buffer/Uint8Array) or a string. - -- **`serializeStream(data: any): ReadableStream`** - Called to convert data structures into streaming format. Useful for handling asynchronous iterables or large datasets. - -- **`deserialize(buffer: Buffer | string): any`** - Called to convert incoming request data into structured format. Receives a string for text MIME types (`text/*`) and a Buffer for binary types. Only used if `deserializeStream` is not defined. - -- **`deserializeStream(stream: ReadableStream): any`** - Called to convert incoming request streams into structured format. Returns deserialized data (potentially as an asynchronous iterable). - -- **`q: number`** _(default: 1)_ - Quality indicator between 0 and 1 representing serialization fidelity. Used in content negotiation to select the best format when multiple options are available. The server chooses the content type with the highest product of client quality × server quality values. - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` diff --git a/versioned_docs/version-4.6/reference/graphql.md b/versioned_docs/version-4.6/reference/graphql.md deleted file mode 100644 index cc43eec9..00000000 --- a/versioned_docs/version-4.6/reference/graphql.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: GraphQL Querying ---- - -# GraphQL Querying - -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../developers/applications/defining-schemas), and for querying [Resources](./resources/). - -Get started by setting `graphql: true` in `config.yaml`. - -This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. - -> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. - -Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. - -For example, to request the GraphQL Query: - -```graphql -query GetDogs { - Dog { - id - name - } -} -``` - -The `GET` request would look like: - -```http -GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D -Accept: application/graphql-response+json -``` - -And the `POST` request would look like: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDogs { Dog { id name } } }" -} -``` - -> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. - -The Harper GraphQL querying system is strictly limited to exported Harper Resources. For many users, this will typically be a table that uses the `@exported` directive in its schema. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resources/#query) for more complex queries. - -Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: - -```graphql -query GetDogsAndOwners { - Dog { - id - name - breed - } - - Owner { - id - name - occupation - } -} -``` - -This will return all dogs and owners in the database. And is equivalent to executing two REST queries: - -```http -GET /Dog/?select(id,name,breed) -# and -GET /Owner/?select(id,name,occupation) -``` - -### Request Parameters - -There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` - -1. `query` - _Required_ - The string representation of the GraphQL document. - 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. - 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). - 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. -1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter -1. `variables` - _Optional_ - A map of variable values to be used for the specified query - -### Type Checking - -The Harper GraphQL Querying system takes many liberties from the GraphQL specification. This extends to how it handle type checking. In general, the querying system does **not** type check. Harper uses the `graphql` parser directly, and then performs a transformation on the resulting AST. We do not control any type checking/casting behavior of the parser, and since the execution step diverges from the spec greatly, the type checking behavior is only loosely defined. - -In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. - -For example, the variable `$name: String!` states that `name` should be a non-null, string value. - -- If the request does not contain the `name` variable, an error will be returned -- If the request provides `null` for the `name` variable, an error will be returned -- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. -- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. - - If `null` is provided as the variable value, an error will still be returned. - - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. -- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. - -The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. - -- Objects will be transformed into properly nested attributes -- Strings and Boolean values are passed through as their AST values -- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. -- List and Enums are not supported. - -### Fragments - -The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. - -For example, in the query - -```graphql -query Get { - Dog { - ...DogFields - } -} - -fragment DogFields on Dog { - name - breed -} -``` - -The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). - -You can literally specify anything in the fragment and it will behave the same way: - -```graphql -fragment DogFields on Any { ... } # this is recommended -fragment DogFields on Cat { ... } -fragment DogFields on Animal { ... } -fragment DogFields on LiterallyAnything { ... } -``` - -As an actual example, fragments should be used for composition: - -```graphql -query Get { - Dog { - ...sharedFields - breed - } - Owner { - ...sharedFields - occupation - } -} - -fragment sharedFields on Any { - id - name -} -``` - -### Short Form Querying - -Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. - -For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. - -```graphql -query GetDog($id: ID!) { - Dog(id: $id) { - name - breed - owner { - name - } - } -} -``` - -And as a properly formed request: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", - "variables": { - "id": "0" - } -} -``` - -The REST equivalent would be: - -```http -GET /Dog/?id==0&select(name,breed,owner{name}) -# or -GET /Dog/0?select(name,breed,owner{name}) -``` - -Short form queries can handle nested attributes as well. - -For example, return all dogs who have an owner with the name `"John"` - -```graphql -query GetDog { - Dog(owner: { name: "John" }) { - name - breed - owner { - name - } - } -} -``` - -Would be equivalent to - -```http -GET /Dog/?owner.name==John&select(name,breed,owner{name}) -``` - -And finally, we can put all of these together to create semi-complex, equality based queries! - -The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. - -```graphql -query GetDog($dogName: String!, $ownerName: String!) { - Dog(name: $dogName, owner: { name: $ownerName }) { - name - breed - owner { - name - } - } -} -``` - -### Long Form Querying - -> Coming soon! - -### Mutations - -> Coming soon! - -### Subscriptions - -> Coming soon! - -### Directives - -> Coming soon! diff --git a/versioned_docs/version-4.6/reference/headers.md b/versioned_docs/version-4.6/reference/headers.md deleted file mode 100644 index 5c85fc88..00000000 --- a/versioned_docs/version-4.6/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Harper Headers ---- - -# Harper Headers - -All Harper API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all Harper API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.6/reference/index.md b/versioned_docs/version-4.6/reference/index.md deleted file mode 100644 index 4c5d867a..00000000 --- a/versioned_docs/version-4.6/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for Harper. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.6/reference/limits.md b/versioned_docs/version-4.6/reference/limits.md deleted file mode 100644 index 97214620..00000000 --- a/versioned_docs/version-4.6/reference/limits.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Harper Limits ---- - -# Harper Limits - -This document outlines limitations of Harper. - -## Database Naming Restrictions - -**Case Sensitivity** - -Harper database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -Harper database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -Harper limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. - -## Primary Keys - -The maximum length of a primary key is 1978 bytes or 659 characters (whichever is shortest). diff --git a/versioned_docs/version-4.6/reference/resources/index.md b/versioned_docs/version-4.6/reference/resources/index.md deleted file mode 100644 index 82269149..00000000 --- a/versioned_docs/version-4.6/reference/resources/index.md +++ /dev/null @@ -1,796 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to provide a unified API for modeling different data resources within Harper. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: - -- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initially access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. -- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. - -The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. - -The REST-based API is a little different from traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind. Semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: - -- Read - `get` -- Create with a known primary key - `put` -- Create with a generated primary key - `post`/`create` -- Update (Full) - `put` -- Update (Partial) - `patch` -- Delete - `delete` - -The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped an instance of MyResource, and will call the instance methods like `get(target)`, `put(target, data)`, and `post(target, data)`, where target is based on the `/3492` part of the path. - -It is recommended that you use the latest version (V2) of the Resource API with the legacy instance binding behavior disabled. This is done by setting the static `loadAsInstance` property to `false` on the Resource class. This will become the default behavior in Harper version 5.0. This page is written assuming `loadAsInstance` is set to `false`. If you want to use the legacy instance binding behavior, you can set `loadAsInstance` to `true` on the Resource class. If you have existing code that you want to migrate, please see the [migration guide](resources/migration) for more information. - -You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the Harper JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - static loadAsInstance = false; // enable the updated API - async get(target) { - // fetch data from an external source, using our id - let response = await this.fetch(target.id); - // do something with the response - } - put(target, data) { - // send the data into the external source - } - delete(target) { - // delete an entity in the external data source - } - subscribe(subscription) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; // enable the updated API - get(target) { - // we can add properties or change properties before returning data: - return { ...super.get(target), newProperty: 'newValue', existingProperty: 42 }; // returns the record, with additional properties - } - put(target, data) { - // can change data any way we want - super.put(target, data); - } - delete(target) { - super.delete(target); - } - post(target, data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -#### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -#### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](./components/extensions). - -### `transaction` - -This provides a function for starting transactions. See the [transactions documentation](./transactions) for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(target: RequestTarget | Id): Promise|AsyncIterable` - -This retrieves a record, or queries for records, and is called by HTTP GET requests. This can be called with a `RequestTarget` which can specify a path/id and query parameters as well as search parameters. For tables, this can also be called directly with an id (string or number) to retrieve a record by id. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. HTTP requests will always call `get` with a full `RequestTarget`. The default `get` method (`super.get(target)`) returns the current record as a plain object. - -The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -class extends Resource { - static loadAsInstance = false; - get(target) { - let param1 = target.get('param1'); // returns 'value' - let id = target.id; // returns 'some-id' - let path = target.pathname; // returns /some-id - let fullTarget = target.target; // returns /some-id?param1=value - ... - } -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return the record identified by the path. If `get` is called on a collection (`/Table/?name=value`), the target will have the `isCollection` property set to `true` and default action is to `search` and return an AsyncIterable of results. - -### `search(query: RequestTarget)`: AsyncIterable - -This performs a query on this resource or table. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an `AsyncIterable` of results. The `query` object can be used to specify the desired query. - -### `put(target: RequestTarget | Id, data: object): void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(target, data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `patch(target: RequestTarget | Id, data: object): void|Response` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(target, data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `update(target: RequestTarget, updates?: object): Updatable` - -This can be called to get an Updatable class for updating a record. An `Updatable` instance provides direct access to record properties as properties on `Updatable` instance. The properties can also be modified and any changes are tracked and written to the record when the transaction commits. For example, if we wanted to update the quantify of a product in the Product table, in response to a post, we could write: - -```javascript -class ... { - post(target, data) { - static loadAsInstance = false; - let updatable = this.update(target); - updatable.quantity = updatable.quantity - 1; - } -} -``` - -In addition, the `Updatable` class has the following methods. - -### `Updatable` class - -#### `addTo(property, value)` - -This adds the provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. We could improve the example above to reliably ensure the quantity is decremented even when it occurs in multiple nodes simultaneously: - -```javascript -class ... { - static loadAsInstance = false; - post(target, data) { - let updatable = this.update(target); - updatable.addTo('quantity', -1); - } -} -``` - -#### `subtractFrom(property, value)` - -This functions exactly the same as `addTo`, except it subtracts the value. - -The `Updatable` also inherits the `getUpdatedTime` and `getExpiresAt` methods from the `RecordObject` class. - -### `delete(target: RequestTarget): void|Response` - -This will delete this record or resource identified by the target, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(target)`) deletes the record identified by target from the table as part of the current transaction. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `publish(target: RequestTarget, message): void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(target, message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `post(target: RequestTarget, data: object): void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `invalidate(target: RequestTarget)` - -This method is available on tables. This will invalidate the specified record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(target: RequestTarget, incomingMessages?: AsyncIterable): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(target: RequestTarget|Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(target: RequestTarget|Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(target: RequestTarget|Id, data: object, context?: Resource|Context): Promise|any` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(target: RequestTarget|Id, recordUpdate: object, context?: Resource|Context): Promise|void` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(target: RequestTarget|Id, context?: Resource|Context): Promise|void` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(target: RequestTarget|Id, message: object, context?: Resource|Context): Promise|void` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: RequestTarget, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](./transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attribute`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### `RequestTarget` - -The `RequestTarget` class is used to represent a URL path that can be mapped to a resource. This is used by the REST interface to map a URL path to a resource class. All REST methods are called with a `RequestTarget` as the first argument, which is used to determine which record or entry to access or modify. Methods on a `Resource` class can be called with a primary key as a string or number value as the first argument, to access or modify a record by primary key, which will work with all the default methods. The static methods will be transform the primary key to a `RequestTarget` instance to call the instance methods for argument normalization. -When RequestTarget is constructed with a URL path (from the REST methods). The static methods will also automatically parse the path to a `RequestTarget` instance, including parsing the search string into query parameters. -Below are the properties and methods of the `RequestTarget` class: - -- `pathname` - The path of the URL relative to the resource path that matched this request. This excluded the query/search string -- `toString()` - The full relative path and search string of the URL -- `search` - The search/query part the target path (the part after the first `?` character) -- `id` - The primary key of the resource, as determined by the path -- `checkPermission` - This property is set to an object indicating that a permission check should be performed on the - resource. This is used by the REST interface to determine if a user has permission to access the resource. The object - contains: - - `action` - The type of action being performed (read/write/delete) - - `resource` - The resource being accessed - - `user` - The user requesting access - -`RequestTarget` is subclass of `URLSearchParams`, and these methods are available for accessing and modifying the query parameters: - -- `get(name: string)` - Get the value of the query parameter with the specified name -- `getAll(name: string)` - Get all the values of the query parameter with the specified name -- `set(name: string, value: string)` - Set the value of the query parameter with the specified name -- `append(name: string, value: string)` - Append the value to the query parameter with the specified name -- `delete(name: string)` - Delete the query parameter with the specified name -- `has(name: string)` - Check if the query parameter with the specified name exists - -In addition, the `RequestTarget` class is an iterable, so you can iterate through the query parameters: - -- `for (let [name, value] of target)` - Iterate through the query parameters - -When a `RequestTarget` has query parameters using Harper's extended query syntax, the REST static methods will parse the `RequestTarget` and potentially add any of the following properties if they are present in the query: - -- `conditions` - An array of conditions that will be used to filter the query results -- `limit` - The limit of the number of records to return -- `offset` - The number of records to skip before returning the results -- `sort` - The sort order of the query results -- `select` - The properties to return in the query results - -### `RecordObject` - -The `get` method will return a `RecordObject` instance, which is an object containing all the properties of the record. Any property on the record can be directly accessed and the properties can be enumerated with standard JS capabilities like `for`-`in` and `Object.keys`. The `RecordObject` instance will also have the following methods: - -- `getUpdatedTime()` - Get the last updated time (the version number) of the record -- `getExpiresAt()` - Get the expiration time of the entry, if there is one. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, you can interact through standard CRUD/REST methods to create, read, update, and delete records. You can idiomatic property access and modification to interact with the records themselves. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our own `get()` we can interact with the record: - -```javascript -export class CustomProduct extends Product { - async get(target) { - let record = await super.get(target); - let name = record.name; // this is the name of the current product - let rating = record.rating; // this is the rating of the current product - // we can't directly modify the record (it is frozen), but we can copy if we want to return a modification - record = { ...record, rating: 3 }; - return record; - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -// if we want to update a single property: -await Product.patch(1, { rating: 3 }); -``` - -When running inside a transaction, we can use the `update` method and updates are automatically saved when a request completes: - -```javascript -export class CustomProduct extends Product { - post(target, data) { - let record = this.update(target); - record.name = data.name; - record.description = data.description; - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let record = this.update(target); - let brandName = record.brand.name; - let firstVariationPrice = record.variations[0].price; - let additionalInfoOnBrand = record.brand.additionalInfo; // not defined in schema, but can still try to access property - // make some changes - record.variations.splice(0, 1); // remove first variation - record.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - record.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.update(1); -product1.delete('additionalInformation'); -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.6/reference/resources/instance-binding.md b/versioned_docs/version-4.6/reference/resources/instance-binding.md deleted file mode 100644 index 45bbcd47..00000000 --- a/versioned_docs/version-4.6/reference/resources/instance-binding.md +++ /dev/null @@ -1,721 +0,0 @@ ---- -title: Resource Class with Resource Instance Binding behavior ---- - -# Resource Class with Resource Instance Binding behavior - -This document describes the legacy instance binding behavior of the Resource class. It is recommended that you use the [updated behavior of the Resource API](./) instead, but this legacy API is preserved for backwards compatibility. - -## Resource Class - -```javascript -export class MyExternalData extends Resource { - static loadAsInstance = true; - async get() { - // fetch data from an external source, using our id - let response = await this.fetch(this.id); - // do something with the response - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../components/extensions). - -### `transaction` - -This provides a function for starting transactions. See the transactions section below for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. - -### `search(query: Query)`: AsyncIterable - -This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object, query?: Query): Resource|void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. - -The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -put(data, query) { - let param1 = query?.get?.('param1'); // returns 'value' - ... -} -``` - -### `patch(data: object): Resource|void|Response` - -### `patch(data: object, query?: Query)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?): Resource|void|Response` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message): Resource|void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data: object, query?: Query): Resource|void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowRead(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `allowUpdate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowDelete(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `addTo(property, value)` - -This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(id: Id, data: object, context?: Resource|Context): Promise` - -### `post(data: object, context?: Resource|Context): Promise` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](../transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../../developers/applications/defining-schemas) (in addition to the [schema documentation](../../developers/applications/defining-schemas), see the [REST documentation](../../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attributes`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.6/reference/resources/migration.md b/versioned_docs/version-4.6/reference/resources/migration.md deleted file mode 100644 index 51ec4c83..00000000 --- a/versioned_docs/version-4.6/reference/resources/migration.md +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: Migration to Resource API version 2 (non-instance binding) ---- - -# Migration to Resource API version 2 (non-instance binding) - -The Resource API was inspired by two major design ideas: the REST architectural design and the [Active Record pattern](https://en.wikipedia.org/wiki/Active_record_pattern) (made popular by Ruby on Rails and heavily used as a pattern in many ORMs). The basic design goal of the Resource API is to integrate these concepts into a single construct that can directly map RESTful methods (specifically the "uniform interface" of HTTP) to an active record data model. However, while the active record pattern has been for _consumption_ of data, implementing methods for endpoint definitions and caching sources as a data _provider_ can be confusing and cumbersome to implement. The updated non-instance binding Resource API is designed to make it easier and more consistent to implement a data provider and interact with records across a table, while maintaining more explicit control over what data is loaded and when. - -The updated Resource API is enabled on a per-class basis by setting static `loadAsInstance` property to `false`. When this property is set to `false`, this means that the Resource instances will not be bound to a specific record. Instead instances represent the whole table, capturing the context and current transactional state. Any records in the table can be loaded or modified from `this` instance. There are a number of implications and different behaviors from a Resource class with `static loadAsInstance = false`: - -- The `get` method (both static and instance) will directly return the record, a frozen enumerable object with direct properties, instead of a Resource instance. -- When instance methods are called, there will not be any record preloaded beforehand and the resource instance will not have properties mapped to a record. -- All instance methods accept a `target`, an instance of `RequestTarget`, as the first argument, which identifies the target record or query. - - The `target` will have an `id` property identifying the target resource, along with target information. - - The `getId()` method is no longer used and will return `undefined`. - - The `target` will provide access to query parameters, search operators, and other directives. - - A `target` property of `checkPermission` indicates that a method should check the permission before of request before proceeding. The default instance methods provide the default authorization behavior. - - This supplants the need for `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete` methods, which shouldn't need to be used (and don't provide the id of the target record). -- Any data from a POST, PUT, and PATCH request will be available in the second argument. This reverses the order of the arguments to `put`, `post`, and `patch` compared to the legacy Resource API. -- Context is tracked using asynchronous context tracking, and will automatically be available to calls to other resources. This can be disabled by setting `static explicitContext = true`, which can improve performance. -- The `update` method will return an `Updatable` object (instead of a Resource instance), which provides properties mapped to a record, but these properties can be updated and changes will be saved when the transaction is committed. - -The following are examples of how to migrate to the non-instance binding Resource API. - -Previous code with a `get` method: - -```javascript -export class MyData extends tables.MyData { - async get(query) { - let id = this.getId(); // get the id - if (query?.size > 0) { - // check number of query parameters - let idWithQuery = id + query.toString(); // add query parameters - let resource = await tables.MyData.get(idWithQuery, this); // retrieve another record - resource.newProperty = 'value'; // assign a new value to the returned resource instance - return resource; - } else { - this.newProperty = 'value'; // assign a new value to this instance - return super.get(query); - } - } -} -``` - -Updated code: - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - async get(target) { - let id = target.id; // get the id - let record; - if (target.size > 0) { - // check number of query parameters - let idWithQuery = target.toString(); // this is the full target with the path query parameters - // we can retrieve another record from this table directly with this.get/super.get or with tables.MyData.get - record = await super.get(idWithQuery); - } else { - record = await super.get(target); // we can just directly use the target as well - } - // the record itself is frozen, but we can copy/assign to a new object with additional properties if we want - return { ...record, newProperty: 'value' }; - } -} -``` - -Here is an example of the preferred approach for authorization: -Previous code with a `get` method: - -```javascript -export class MyData extends tables.MyData { - allowRead(user) { - // allow any authenticated user - return user ? true : false; - } - async get(query) { - // any get logic - return super.get(query); - } -} -``` - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - async get(target) { - // While you can still use allowRead, it is not called before get is called, and it is generally encouraged - // to perform/call authorization explicitly in direct get, put, post methods rather than using allow* methods. - if (!this.getContext().user) throw new Error('Unauthorized'); - target.checkPermissions = false; // authorization complete, no need to further check permissions below - // target.checkPermissions is set to true or left in place, this default get method will perform the default permissions checks - return super.get(target); // we can just directly use the query as well - } -} -``` - -Here is an example of how to convert/upgrade an implementation of a `post` method: -Previous code with a `post` method: - -```javascript -export class MyData extends tables.MyData { - async post(data, query) { - let resource = await tables.MyData.get(data.id, this); - if (resource) { - // update a property - resource.someProperty = 'value'; - // or - tables.MyData.patch(data.id, { someProperty: 'value' }, this); - } else { - // create a new record - MyData.create(data, this); - } - } -} -``` - -Updated code: - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - // IMPORTANT: arguments are reversed: - async post(target, data) { - let record = await this.get(data.id); - if (record) { - // update a property - const updatable = await this.update(data.id); // we can alternately pass a target to update - updatable.someProperty = 'value'; - // or - this.patch(data.id, { someProperty: 'value' }); - } else { - // create a new record - this.create(data); - } - } -} -``` diff --git a/versioned_docs/version-4.6/reference/resources/query-optimization.md b/versioned_docs/version-4.6/reference/resources/query-optimization.md deleted file mode 100644 index 139b862b..00000000 --- a/versioned_docs/version-4.6/reference/resources/query-optimization.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Query Optimization ---- - -## Query Optimization - -Harper has powerful query functionality with excellent performance characteristics. However, like any database, different queries can vary significantly in performance. It is important to understand how querying works to help you optimize your queries for the best performance. - -### Query Execution - -At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database and delivering the results based on required fields, relationships, and ordering. Harper supports indexed fields, and these indexes are used to speed up query execution. When conditions are specified in a query, Harper will attempt to utilize indexes to optimize the speed of query execution. When a field is not indexed, a query specifies a condition on that field, and the database check each potential record to determine if it matches the condition. - -When a query is performed with multiple conditions, Harper will attempt to optimize the ordering of these conditions. When using intersecting conditions (the default, an `and` operator, matching records must all match all conditions), Harper will attempt to to apply the most selective and performant condition first. This means that if one condition can use an index and is more selective than another, it will be used first to find the initial matching set of data and then filter based on the remaining conditions. If a condition can search an indexed field, with a selective condition, it will be used before conditions that aren't indexed, or as selective. The `search` method includes an `explain` flag that can be used to return a query execution order to understand how the query is being executed. This can be useful for debugging and optimizing queries. - -For a union query, each condition is executed separately and the results are combined/merged. - -### Condition, Operators, and Indexing - -When a query is performed, the conditions specified in the query are evaluated against the data in the database. The conditions can be simple or complex, and can include scalar operators such as `=`, `!=`, `>`, `<`, `>=`, `<=`, as well as `starts_with`, `contains`, and `ends_with`. The use of these operators can affect the performance of the query, especially when used with indexed fields. If an indexed field is not used, the database will have to check each potential record to determine if it matches the condition. If the only condition is not indexed, or there are no conditions with an indexed field, the database will have to check every record with a full table scan and can be very slow for large datasets (it will get slower as the dataset grows, `O(n)`). - -The use of indexed fields can significantly improve the performance of a query, providing fast performance even as the database grows in size (`O(log n)`). However, indexed fields require extra writes to the database when performing insert, update, or delete operations. This is because the index must be updated to reflect the changes in the data. This can slow down write operations, but the trade-off is often worth it if the field is frequently used in queries. - -The different operators can also affect the performance of a query. For example, using the `=` operator on an indexed field is generally faster than using the `!=` operator, as the latter requires checking all records that do not match the condition. An index is a sorted listed of values, so the greater than and less than operators will also utilize indexed fields when possible. If the range is narrow, these operations can be very fast. A wide range could yield a large number of records and will naturally incur more overhead. The `starts_with` operator can also leverage indexed fields because it quickly find the correct matching entries in the sorted index. On other hand, the `contains` and `ends_with` and not equal (`!=` or `not_equal`) operators can not leverage the indexes, so they will require a full table scan to find the matching records if they are not used in conjunction in with a selective/indexed condition. There is a special case of `!= null` which can use indexes to find non-null records. However, there is generally only helpful for sparse fields where a small subset are non-null values. More generally, operators are more efficient if they are selecting on fields with a high cardinality. - -Conditions can be applied to primary key fields or other indexed fields (known as secondary indexes). In general, querying on a primary key will be faster than querying on a secondary index, as the primary key is the most efficient way to access data in the database, and doesn't require cross-referencing to the main records. - -### Relationships/Joins - -Harper supports relationships between tables, allowing for "join" queries that. This does result in more complex queries with potentially larger performance overhead, as more lookups are necessary to connect matched or selected data with other tables. Similar principles apply to conditions which use relationships. Indexed fields and comparators that leverage the ordering are still valuable for performance. It is also important that if a condition on a table is connected to another table's foreign key, that that foreign key also be indexed. Likewise, if a query `select`s data from a related table that uses a foreign key to relate, that it is indexed. The same principles of higher cardinality applies here as well, more unique values allow for efficient lookups. - -### Sorting - -Queries can also specify a sort order. This can also significantly impact performance. If a query specifies a sort order on an indexed field, the database can use the index to quickly retrieve the data in the specified order. A sort order can be used in conjunction with a condition on the same (indexed) field can utilize the index for ordering. However, if the sort order is not on an indexed field, or the query specifies conditions on different fields, Harper will generally need to sort the data after retrieving it, which can be slow for large datasets. The same principles apply to sorting as they do to conditions. Sorting on a primary key is generally faster than sorting on a secondary index, if the condition aligns with the sort order. - -### Streaming - -One of the unique and powerful features of Harper's querying functionality is the ability to stream query results. When possible, Harper can return records from a query as they are found, rather than waiting for the entire query to complete. This can significantly improve performance for large queries, as it allows the application to start processing results or sending the initial data before the entire query is complete (improving time-to-first-byte speed, for example). However, using a sort order on a query with conditions that are not on an aligned index requires that the entire query result be loaded in order to perform the sorting, which defeats the streaming benefits. diff --git a/versioned_docs/version-4.6/reference/roles.md b/versioned_docs/version-4.6/reference/roles.md deleted file mode 100644 index 2e3dc570..00000000 --- a/versioned_docs/version-4.6/reference/roles.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Roles ---- - -# Roles - -Roles in Harper are part of the application’s role-based access control (RBAC) system. You can declare roles in your application and manage their permissions through a roles configuration file. When the application starts, Harper will ensure all declared roles exist with the specified permissions, updating them if necessary. - -## Configuring Roles - -Point to a roles configuration file from your application’s `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -You can declare one or more files. Each file should define one or more roles in YAML format. - -## Roles File Structure - -A roles file (`roles.yaml`) contains role definitions keyed by role name. Each role may contain: - -- **super_user** – a boolean that grants all permissions. -- **databases** – one or more databases the role has access to. -- **tables** – within each database, table-level and attribute-level permissions. - -**Full Example** - -```yaml -: - super_user: # optional - : - : - read: - insert: - update: - delete: - attributes: - : - read: - insert: - update: -``` - -## Role Flags - -- `super_user: true` — grants full system access. -- `super_user: false` — the role only has the explicit permissions defined in the role. - -## Database and Table Permissions - -Within each role, you may specify one or more databases. Each database can declare permissions for tables. - -Example: - -```yaml -analyst: - super_user: false - data: - Sales: - read: true - insert: false - update: false - delete: false -``` - -In this example, the `analyst` role has read-only access to the `Sales` table in the `data` database. - -## Attribute-Level Permissions - -You can also grant or deny access at the attribute level within a table. - -Example: - -```yaml -editor: - data: - Articles: - read: true - insert: true - update: true - attributes: - title: - read: true - update: true - author: - read: true - update: false -``` - -Here, the `editor` role can update the `title` of an article but cannot update the `author`. - -## Multiple Roles - -Roles can be defined side by side in a single file: - -```yaml -reader: - super_user: false - data: - Dog: - read: true - -writer: - super_user: false - data: - Dog: - insert: true - update: true -``` - -## Behavior on Startup - -- If a declared role does not exist, Harper creates it. -- If a declared role already exists, Harper updates its permissions to match the definition. -- Roles are enforced consistently across deployments, keeping access control in sync with your application code. diff --git a/versioned_docs/version-4.6/reference/sql-guide/date-functions.md b/versioned_docs/version-4.6/reference/sql-guide/date-functions.md deleted file mode 100644 index c9747dcd..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: SQL Date Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Date Functions - -Harper utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.6/reference/sql-guide/features-matrix.md b/versioned_docs/version-4.6/reference/sql-guide/features-matrix.md deleted file mode 100644 index 7766faa4..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## SQL Features Matrix - -Harper provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.6/reference/sql-guide/functions.md b/versioned_docs/version-4.6/reference/sql-guide/functions.md deleted file mode 100644 index 02fff906..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/functions.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: Harper SQL Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Functions - -This SQL keywords reference contains the SQL functions available in Harper. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.6/reference/sql-guide/index.md b/versioned_docs/version-4.6/reference/sql-guide/index.md deleted file mode 100644 index 52f245ab..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## Harper SQL Guide - -The purpose of this guide is to describe the available functionality of Harper as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -Harper adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -Harper has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -Harper supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. Harper does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -Harper supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -Harper supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -Harper allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.6/reference/sql-guide/json-search.md b/versioned_docs/version-4.6/reference/sql-guide/json-search.md deleted file mode 100644 index 1c0c396b..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/json-search.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: SQL JSON Search ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL JSON Search - -Harper automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, Harper offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.6/reference/sql-guide/reserved-word.md b/versioned_docs/version-4.6/reference/sql-guide/reserved-word.md deleted file mode 100644 index 2cd812ba..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: Harper SQL Reserved Words ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: - -``` -SELECT * from data.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from data.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.6/reference/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.6/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index bf7f542f..00000000 --- a/versioned_docs/version-4.6/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,419 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Geospatial Functions - -Harper geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: [https://geojson.org/](https://geojson.org/). There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -1. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -1. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between Harper’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain Harper Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see [https://developers.arcgis.com/documentation/spatial-references/](https://developers.arcgis.com/documentation/spatial-references/). Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find Harper Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "Harper Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.6/reference/storage-algorithm.md b/versioned_docs/version-4.6/reference/storage-algorithm.md deleted file mode 100644 index 99525536..00000000 --- a/versioned_docs/version-4.6/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The Harper storage algorithm is fundamental to the Harper core functionality, enabling the [Dynamic Schema](./dynamic-schema) and all other user-facing functionality. Harper is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within Harper. - -## Query Language Agnostic - -The Harper storage algorithm was designed to abstract the data storage from any individual query language. Harper currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, Harper offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each Harper table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. Harper tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [Harper Dynamic Schema](./dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -Harper inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## Harper Indexing Example (Single Table) - -![](/img/v4.6/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.6/reference/transactions.md b/versioned_docs/version-4.6/reference/transactions.md deleted file mode 100644 index 7e8546fb..00000000 --- a/versioned_docs/version-4.6/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. Harper provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in Harper. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because Harper is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the Harper environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. diff --git a/versioned_docs/version-4.7/administration/_category_.json b/versioned_docs/version-4.7/administration/_category_.json deleted file mode 100644 index 59c33ea4..00000000 --- a/versioned_docs/version-4.7/administration/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Administration", - "position": 2, - "link": { - "type": "generated-index", - "title": "Administration Documentation", - "description": "Guides for managing and administering HarperDB instances", - "keywords": ["administration"] - } -} diff --git a/versioned_docs/version-4.7/administration/administration.md b/versioned_docs/version-4.7/administration/administration.md deleted file mode 100644 index e0084bb5..00000000 --- a/versioned_docs/version-4.7/administration/administration.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Best Practices and Recommendations ---- - -# Best Practices and Recommendations - -Harper is designed for minimal administrative effort, and with managed services these are handled for you. But there are important things to consider for managing your own Harper servers. - -### Data Protection and (Backup and) Recovery - -As a distributed database, data protection and recovery can benefit from different data protection strategies than a traditional single-server database. But multiple aspects of data protection and recovery should be considered: - -- Availability: As a distributed database Harper is intrinsically built for high-availability and a cluster will continue to run even with complete server(s) failure. This is the first and primary defense for protecting against any downtime or data loss. Harper provides fast horizontal scaling functionality with node cloning, which facilitates ease of establishing high availability clusters. -- [Audit log](administration/logging/audit-logging): Harper defaults to tracking data changes so malicious data changes can be found, attributed, and reverted. This provides security-level defense against data loss, allowing for fine-grained isolation and reversion of individual data without the large-scale reversion/loss of data associated with point-in-time recovery approaches. -- Snapshots: When used as a source-of-truth database for crucial data, we recommend using snapshot tools to regularly snapshot databases as a final backup/defense against data loss (this should only be used as a last resort in recovery). Harper has a [`get_backup`](./developers/operations-api/databases-and-tables#get-backup) operation, which provides direct support for making and retrieving database snapshots. An HTTP request can be used to get a snapshot. Alternatively, volume snapshot tools can be used to snapshot data at the OS/VM level. Harper can also provide scripts for replaying transaction logs from snapshots to facilitate point-in-time recovery when necessary (often customization may be preferred in certain recovery situations to minimize data loss). - -### Horizontal Scaling with Node Cloning - -Harper provides rapid horizontal scaling capabilities through [node cloning functionality described here](administration/cloning). - -### Monitoring - -Harper provides robust capabilities for analytics and observability to facilitate effective and informative monitoring: - -- Analytics provides statistics on usage, request counts, load, memory usage with historical tracking. The analytics data can be [accessed through querying](./reference/analytics). -- A large variety of real-time statistics about load, system information, database metrics, thread usage can be retrieved through the [`system_information` API](./developers/operations-api/system-operations). -- Information about the current cluster configuration and status can be found in the [cluster APIs](./developers/operations-api/clustering). -- Analytics and system information can easily be exported to Prometheus with our [Prometheus exporter component](https://github.com/HarperDB-Add-Ons/prometheus_exporter), making it easy visualize and monitor Harper with Graphana. - -### Replication Transaction Logging - -Harper utilizes NATS for replication, which maintains a transaction log. See the [transaction log documentation for information on how to query this log](administration/logging/transaction-logging). diff --git a/versioned_docs/version-4.7/administration/cloning.md b/versioned_docs/version-4.7/administration/cloning.md deleted file mode 100644 index b3698092..00000000 --- a/versioned_docs/version-4.7/administration/cloning.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Clone Node ---- - -# Clone Node - -Clone node is a configurable node script that when pointed to another instance of Harper will create a clone of that -instance's config, databases and setup full replication. If it is run in a location where there is no existing Harper install, -it will, along with cloning, install Harper. If it is run in a location where there is another Harper instance, it will -only clone config, databases and replication that do not already exist. - -Clone node is triggered when Harper is installed or started with certain environment or command line (CLI) variables set (see below). - -**Leader node** - the instance of Harper you are cloning.\ -**Clone node** - the new node which will be a clone of the leader node. - -To start clone run `harperdb` in the CLI with either of the following variables set: - -#### Environment variables - -- `HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `HDB_LEADER_USERNAME` - The leader node admin username. -- `HDB_LEADER_PASSWORD` - The leader node admin password. -- `REPLICATION_HOSTNAME` - _(optional)_ The clones replication hostname. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -HDB_LEADER_URL=https://node-1.my-domain.com:9925 REPLICATION_HOSTNAME=node-2.my-domain.com HDB_LEADER_USERNAME=... HDB_LEADER_PASSWORD=... harperdb -``` - -#### Command line variables - -- `--HDB_LEADER_URL` - The URL of the leader node's operation API (usually port 9925). -- `--HDB_LEADER_USERNAME` - The leader node admin username. -- `--HDB_LEADER_PASSWORD` - The leader node admin password. -- `--REPLICATION_HOSTNAME` - _(optional)_ The clones clustering host. This value will be added to `replication.hostname` on the clone node. If this value is not set, replication will not be set up between the leader and clone. - -For example: - -``` -harperdb --HDB_LEADER_URL https://node-1.my-domain.com:9925 --REPLICATION_HOSTNAME node-2.my-domain.com --HDB_LEADER_USERNAME ... --HDB_LEADER_PASSWORD ... -``` - -Each time clone is run it will set a value `cloned: true` in `harperdb-config.yaml`. This value will prevent clone from -running again. If you want to run clone again set this value to `false`. If Harper is started with the clone variables -still present and `cloned` is true, Harper will just start as normal. - -Clone node does not require any additional configuration apart from the variables referenced above. -However, if you wish to set any configuration during clone this can be done by passing the config as environment/CLI -variables or cloning overtop of an existing `harperdb-config.yaml` file. - -More can be found in the Harper config documentation [here](../deployments/configuration). - -### Excluding database and components - -To set any specific (optional) clone config, including the exclusion of any database and/or replication, there is a file -called `clone-node-config.yaml` that can be used. - -The file must be located in the `ROOTPATH` directory of your clone (the `hdb` directory where you clone will be installed. -If the directory does not exist, create one and add the file to it). - -The config available in `clone-node-config.yaml` is: - -```yaml -databaseConfig: - excludeDatabases: - - database: null - excludeTables: - - database: null - table: null -componentConfig: - exclude: - - name: null -``` - -_Note: only include the configuration that you are using. If no clone config file is provided nothing will be excluded, -unless it already exists on the clone._ - -`databaseConfig` - Set any databases or tables that you wish to exclude from cloning. - -`componentConfig` - Set any components that you do not want cloned. Clone node will not clone the component code, -it will only clone the component reference that exists in the leader harperdb-config file. - -### Cloning configuration - -Clone node will not clone any configuration that is classed as unique to the leader node. This includes `replication.hostname`, `replication.url`,`clustering.nodeName`, -`rootPath` and any other path related values, for example `storage.path`, `logging.root`, `componentsRoot`, -any authentication certificate/key paths. - -### Cloning system database - -Harper uses a database called `system` to store operational information. Clone node will only clone the user and role -tables from this database. It will also set up replication on this table, which means that any existing and future user and roles -that are added will be replicated throughout the cluster. - -Cloning the user and role tables means that once clone node is complete, the clone will share the same login credentials with -the leader. - -### Replication - -If clone is run with the `REPLICATION_HOSTNAME` variable set, a fully replicating clone will be created. - -If any databases are excluded from the clone, replication will not be set up on these databases. - -### JWT Keys - -If cloning with replication, the leader's JWT private and public keys will be cloned. To disable this, include `CLONE_KEYS=false` in your clone variables. - -### Cloning overtop of an existing Harper instance - -Clone node will not overwrite any existing config, database or replication. It will write/clone any config database or replication -that does not exist on the node it is running on. - -An example of how this can be useful is if you want to set Harper config before the clone is created. To do this you -would create a harperdb-config.yaml file in your local `hdb` root directory with the config you wish to set. Then -when clone is run it will append the missing config to the file and install Harper with the desired config. - -Another useful example could be retroactively adding another database to an existing instance. Running clone on -an existing instance could create a full clone of another database and set up replication between the database on the -leader and the clone. - -### Cloning steps - -Clone node will execute the following steps when ran: - -1. Look for an existing Harper install. It does this by using the default (or user provided) `ROOTPATH`. -1. If an existing instance is found it will check for a `harperdb-config.yaml` file and search for the `cloned` value. If the value exists and is `true` clone will skip the clone logic and start Harper. -1. Clone harperdb-config.yaml values that don't already exist (excluding values unique to the leader node). -1. Fully clone any databases that don't already exist. -1. If classed as a "fresh clone", install Harper. An instance is classed as a fresh clone if there is no system database. -1. If `REPLICATION_HOSTNAME` is set, set up replication between the leader and clone. -1. Clone is complete, start Harper. - -### Cloning with Docker - -To run clone inside a container add the environment variables to your run command. - -For example: - -``` -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_LEADER_PASSWORD=password \ - -e HDB_LEADER_USERNAME=admin \ - -e HDB_LEADER_URL=https://1.123.45.6:9925 \ - -e REPLICATION_HOSTNAME=1.123.45.7 \ - -p 9925:9925 \ - -p 9926:9926 \ - harperdb/harperdb -``` - -Clone will only run once, when you first start the container. If the container restarts the environment variables will be ignored. diff --git a/versioned_docs/version-4.7/administration/compact.md b/versioned_docs/version-4.7/administration/compact.md deleted file mode 100644 index 1a71db14..00000000 --- a/versioned_docs/version-4.7/administration/compact.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Compact ---- - -# Compact - -Database files can grow quickly as you use them, sometimes impeding performance. Harper has multiple compact features that can be used to reduce database file size and potentially improve performance. The compact process does not compress your data, it instead makes your database file smaller by eliminating free-space and fragmentation. - -There are two options that Harper offers for compacting a Database. - -_Note: Some of the storage configuration (such as compression) cannot be updated on existing databases, this is where the following options are useful. They will create a new compressed copy of the database with any updated configuration._ - -More information on the storage configuration options can be [found here](../deployments/configuration#storage) - -### Copy compaction - -It is recommended that, to prevent any record loss, Harper is not running when performing this operation. - -This will copy a Harper database with compaction. If you wish to use this new database in place of the original, you will need to move/rename it to the path of the original database. - -This command should be run in the [CLI](../deployments/harper-cli) - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - -### Compact on start - -Compact on start is a more automated option that will compact **all** databases when Harper is started. Harper will not start until compact is complete. Under the hood it loops through all non-system databases, creates a backup of each one and calls copy-db. After the copy/compaction is complete it will move the new database to where the original one is located and remove any backups. - -Compact on start is initiated by config in `harperdb-config.yaml` - -_Note: Compact on start will switch `compactOnStart` to `false` after it has run_ - -`compactOnStart` - _Type_: boolean; _Default_: false - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -Using CLI variables - -```bash ---STORAGE_COMPACTONSTART true --STORAGE_COMPACTONSTARTKEEPBACKUP true -``` - -```bash -STORAGE_COMPACTONSTART=true -STORAGE_COMPACTONSTARTKEEPBACKUP=true -``` diff --git a/versioned_docs/version-4.7/administration/harper-studio/create-account.md b/versioned_docs/version-4.7/administration/harper-studio/create-account.md deleted file mode 100644 index e1ffbb87..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/create-account.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Create a Studio Account ---- - -# Create a Studio Account - -Start at the [Harper Studio sign up page](https://fabric.harper.fast/#/sign-up). - -1. Provide the following information: - - First Name - - Last Name - - Email Address - - Subdomain - - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ - - - Coupon Code (optional) - -1. Review the Privacy Policy and Terms of Service. -1. Click the sign up for free button. -1. You will be taken to a new screen to add an account password. Enter your password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -1. Click the add account password button. - -You will receive a Studio welcome email confirming your registration. - -Note: Your email address will be used as your username and cannot be changed. diff --git a/versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md b/versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md deleted file mode 100644 index 2530fef0..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/enable-mixed-content.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Enable Mixed Content ---- - -# Enable Mixed Content - -If you want to connect insecure HTTP instances from the secure HTTPS Fabric Studio, you can enable mixed content temporarily. This isn't recommended in production systems. It would be better to add HTTPS / SSL Termination in front of your instances. But if you understand the risks, you can enable mixed content. Enabling mixed content is required in cases where you would like to connect the Harper Studio to Harper Instances via HTTP. This should not be used for production systems, but may be convenient for development and testing purposes. Doing so will allow your browser to reach HTTP traffic, which is considered insecure, through an HTTPS site like the Studio. - -A comprehensive guide is provided by Adobe [here](https://experienceleague.adobe.com/docs/target/using/experiences/vec/troubleshoot-composer/mixed-content.html). diff --git a/versioned_docs/version-4.7/administration/harper-studio/index.md b/versioned_docs/version-4.7/administration/harper-studio/index.md deleted file mode 100644 index 7d7192fe..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: Harper Studio ---- - -# Harper Studio - -Harper Studio is the web-based GUI for Harper. Studio enables you to administer, navigate, and monitor all of your Harper instances in a simple, user-friendly interface without any knowledge of the underlying Harper API. It’s free to sign up, get started today! - -[Sign up for free!](https://studio.harperdb.io/sign-up) - -Harper now includes a simplified local Studio that is packaged with all Harper installations and served directly from the instance. It can be enabled in the [configuration file](../deployments/configuration#localstudio). This section is dedicated to the hosted Studio accessed at [studio.harperdb.io](https://studio.harperdb.io). - ---- - -## How does Studio Work? - -While Harper Studio is web based and hosted by us, all database interactions are performed on the Harper instance the studio is connected to. The Harper Studio loads in your browser, at which point you login to your Harper instances. Credentials are stored in your browser cache and are not transmitted back to Harper. All database interactions are made via the Harper Operations API directly from your browser to your instance. - -## What type of instances can I manage? - -Harper Studio enables users to manage both Harper Cloud instances and privately hosted instances all from a single UI. All Harper instances feature identical behavior whether they are hosted by us or by you. diff --git a/versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md b/versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md deleted file mode 100644 index 06a6eb89..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/instance-configuration.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -title: Instance Configuration ---- - -# Instance Configuration - -Harper instance configuration can be viewed and managed directly through the Harper Studio. Harper Cloud instances can be resized in two different ways via this page, either by modifying machine RAM or by increasing drive storage. Enterprise instances can have their licenses modified by modifying licensed RAM. - -All instance configuration is handled through the **config** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click config in the instance control bar. - -_Note, the **config** page will only be available to super users and certain items are restricted to Studio organization owners._ - -## Instance Overview - -The **instance overview** panel displays the following instance specifications: - -- Instance URL - -- Applications URL - -- Instance Node Name (for clustering) - -- Instance API Auth Header (this user) - - _The Basic authentication header used for the logged in Harper database user_ - -- Created Date (Harper Cloud only) - -- Region (Harper Cloud only) - - _The geographic region where the instance is hosted._ - -- Total Price - -- RAM - -- Storage (Harper Cloud only) - -- Disk IOPS (Harper Cloud only) - -## Update Instance RAM - -Harper Cloud instance size and Enterprise instance licenses can be modified with the following instructions. This option is only available to Studio organization owners. - -Note: For Harper Cloud instances, upgrading RAM may add additional CPUs to your instance as well. Click here to see how many CPUs are provisioned for each instance size. - -1. In the **update ram** panel at the bottom left: - - Select the new instance size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -1. The instance will shut down and begin reprovisioning/relicensing itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. - -1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if Harper Cloud instance reprovisioning takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Update Instance Storage - -The Harper Cloud instance storage size can be increased with the following instructions. This option is only available to Studio organization owners. - -Note: Instance storage can only be upgraded once every 6 hours. - -1. In the **update storage** panel at the bottom left: - - Select the new instance storage size. - - If you do not have a credit card associated with your account, an **Add Credit Card To Account** button will appear. Click that to be taken to the billing screen where you can enter your credit card information before returning to the **config** tab to proceed with the upgrade. - - If you do have a credit card associated, you will be presented with the updated billing information. - - Click **Upgrade**. - -1. The instance will shut down and begin reprovisioning itself. The instance will not be available during this time. You will be returned to the instance dashboard and the instance status will show UPDATING INSTANCE. -1. Once your instance upgrade is complete, it will appear on the instance dashboard as status OK with your newly selected instance size. - -_Note, if this process takes longer than 20 minutes, please submit a support ticket here: [https://harperdbhelp.zendesk.com/hc/en-us/requests/new](https://harperdbhelp.zendesk.com/hc/en-us/requests/new)._ - -## Remove Instance - -The Harper instance can be deleted/removed from the Studio with the following instructions. Once this operation is started it cannot be undone. This option is only available to Studio organization owners. - -1. In the **remove instance** panel at the bottom left: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Remove**. - -1. The instance will begin deleting immediately. - -## Restart Instance - -The Harper Cloud instance can be restarted with the following instructions. - -1. In the **restart instance** panel at the bottom right: - - Enter the instance name in the text box. - - The Studio will present you with a warning. - - Click **Restart**. - -1. The instance will begin restarting immediately. - -## Instance Config (Read Only) - -A JSON preview of the instance config is available for reference at the bottom of the page. This is a read only visual and is not editable via the Studio. To make changes to the instance config, review the [configuration file documentation](../../deployments/configuration#using-the-configuration-file-and-naming-conventions). diff --git a/versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md b/versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md deleted file mode 100644 index e9b48939..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/instance-metrics.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Instance Metrics ---- - -# Instance Metrics - -The Harper Studio display instance status and metrics on the instance status page, which can be accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **status** in the instance control bar. - -Once on the instance browse page you can view host system information, [Harper logs](../logging/standard-logging), and Harper Cloud alarms (if it is a cloud instance). - -_Note, the **status** page will only be available to super users._ diff --git a/versioned_docs/version-4.7/administration/harper-studio/instances.md b/versioned_docs/version-4.7/administration/harper-studio/instances.md deleted file mode 100644 index b367ed96..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/instances.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Instances ---- - -# Instances - -The Harper Studio allows you to administer all of your HarperDinstances in one place. Harper currently offers the following instance types: - -- **Harper Cloud Instance** Managed installations of Harper, what we call [Harper Cloud](../../deployments/harper-cloud/). -- **5G Wavelength Instance** Managed installations of Harper running on the Verizon network through AWS Wavelength, what we call 5G Wavelength Instances. _Note, these instances are only accessible via the Verizon network._ -- **Enterprise Instance** Any Harper installation that is managed by you. These include instances hosted within your cloud provider accounts (for example, from the AWS or Digital Ocean Marketplaces), privately hosted instances, or instances installed locally. - -All interactions between the Studio and your instances take place directly from your browser. Harper stores metadata about your instances, which enables the Studio to display these instances when you log in. Beyond that, all traffic is routed from your browser to the Harper instances using the standard [Harper API](../../developers/operations-api/). - -## Organization Instance List - -A summary view of all instances within an organization can be viewed by clicking on the appropriate organization from the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. Each instance gets their own card. Harper Cloud and Enterprise instances are listed together. - -## Create a New Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select your desired Instance Type. -1. For a Harper Cloud Instance or a Harper 5G Wavelength Instance, click **Create Harper Cloud Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This will be used to build your instance URL. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com). The Instance URL will be previewed below._ - - 1. Enter Instance Username - - _This is the username of the initial Harper instance super user._ - - 1. Enter Instance Password - - _This is the password of the initial Harper instance super user._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper Cloud Instances are billed based on Instance RAM, this will select the size of your provisioned instance._ _More on instance specs\_\_._ - - 1. Select Storage Size - - _Each instance has a mounted storage volume where your Harper data will reside. Storage is provisioned based on space and IOPS._ _More on IOPS Impact on Performance\_\_._ - - 1. Select Instance Region - - _The geographic area where your instance will be provisioned._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. Your Harper Cloud instance will be provisioned in the background. Provisioning typically takes 5-15 minutes. You will receive an email notification when your instance is ready. - -## Register Enterprise Instance - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization for the instance to be created under. -1. Click the **Create New Harper Cloud Instance + Register Enterprise Instance** card. -1. Select **Register Enterprise Instance**. - 1. Fill out Instance Info. - 1. Enter Instance Name - - _This is used for descriptive purposes only._ - - 1. Enter Instance Username - - _The username of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Instance Password - - _The password of a Harper super user that is already configured in your Harper installation._ - - 1. Enter Host - - _The host to access the Harper instance. For example, `harperdb.myhost.com` or `localhost`._ - - 1. Enter Port - - _The port to access the Harper instance. Harper defaults `9925` for HTTP and `31283` for HTTPS._ - - 1. Select SSL - - _If your instance is running over SSL, select the SSL checkbox. If not, you will need to enable mixed content in your browser to allow the HTTPS Studio to access the HTTP instance. If there are issues connecting to the instance, the Studio will display a red error message._ - - 1. Click **Instance Details** to move to the next page. - 1. Select Instance Specs - 1. Select Instance RAM - - _Harper instances are billed based on Instance RAM. Selecting additional RAM will enable the ability for faster and more complex queries._ - - 1. Click **Confirm Instance Details** to move to the next page. - 1. Review your Instance Details, if there is an error, use the back button to correct it. - 1. Review the [Privacy Policy](https://harperdb.io/legal/privacy-policy/) and [Terms of Service](https://harperdb.io/legal/harperdb-cloud-terms-of-service/), if you agree, click the **I agree** radio button to confirm. - 1. Click **Add Instance**. - 1. The Harper Studio will register your instance and restart it for the registration to take effect. Your instance will be immediately available after this is complete. - -## Delete an Instance - -Instance deletion has two different behaviors depending on the instance type. - -- **Harper Cloud Instance** This instance will be permanently deleted, including all data. This process is irreversible and cannot be undone. -- **Enterprise Instance** The instance will be removed from the Harper Studio only. This does not uninstall Harper from your system and your data will remain intact. - -An instance can be deleted as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the trash can icon. -1. Enter the instance name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an instance._ - -1. Click the **Do It** button. - -## Upgrade an Instance - -Harper instances can be resized on the [Instance Configuration](instance-configuration) page. - -## Instance Log In/Log Out - -The Studio enables users to log in and out of different database users from the instance control panel. To log out of an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card and click the lock icon. -1. You will immediately be logged out of the instance. - -To log in to an instance: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Identify the proper instance card, it will have an unlocked icon and a status reading PLEASE LOG IN, and click the center of the card. -1. Enter the database username. - - _The username of a Harper user that is already configured in your Harper instance._ - -1. Enter the database password. - - _The password of a Harper user that is already configured in your Harper instance._ - -1. Click **Log In**. diff --git a/versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md b/versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md deleted file mode 100644 index 199d38ce..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/login-password-reset.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Login and Password Reset ---- - -# Login and Password Reset - -## Log In to Your Harper Studio Account - -To log into your existing Harper Studio account: - -1. Navigate to the [Harper Studio](https://studio.harperdb.io/). -1. Enter your email address. -1. Enter your password. -1. Click **sign in**. - -## Reset a Forgotten Password - -To reset a forgotten password: - -1. Navigate to the Harper Studio password reset page. -1. Enter your email address. -1. Click **send password reset email**. -1. If the account exists, you will receive an email with a temporary password. -1. Navigate back to the Harper Studio login page. -1. Enter your email address. -1. Enter your temporary password. -1. Click **sign in**. -1. You will be taken to a new screen to reset your account password. Enter your new password. - _Passwords must be a minimum of 8 characters with at least 1 lower case character, 1 upper case character, 1 number, and 1 special character._ -1. Click the **add account password** button. - -## Change Your Password - -If you are already logged into the Studio, you can change your password though the user interface. - -1. Navigate to the Harper Studio profile page. -1. In the **password** section, enter: - - Current password. - - New password. - - New password again _(for verification)_. - -1. Click the **Update Password** button. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-applications.md b/versioned_docs/version-4.7/administration/harper-studio/manage-applications.md deleted file mode 100644 index 52e8cc64..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/manage-applications.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Manage Applications ---- - -# Manage Applications - -[Harper Applications](../../developers/applications/) are enabled by default and can be configured further through the Harper Studio. It is recommended to read through the [Applications](../../developers/applications/) documentation first to gain a strong understanding of Harper Applications behavior. - -All Applications configuration and development is handled through the **applications** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **applications** in the instance control bar. - -_Note, the **applications** page will only be available to super users._ - -## Manage Applications - -The Applications editor is not required for development and deployment, though it is a useful tool to maintain and manage your Harper Applications. The editor provides the ability to create new applications or import/deploy remote application packages. - -The left bar is the applications file navigator, allowing you to select files to edit and add/remove files and folders. By default, this view is empty because there are no existing applications. To get started, either create a new application or import/deploy a remote application. - -The right side of the screen is the file editor. Here you can make edit individual files of your application directly in the Harper Studio. - -## Things to Keep in Mind - -To learn more about developing Harper Applications, make sure to read through the [Applications](../../developers/applications/) documentation. - -When working with Applications in the Harper Studio, by default the editor will restart the Harper Applications server every time a file is saved. Note, this behavior can be turned off by toggling the `auto` toggle at the top right of the applications page. If you are constantly editing your application, it may result in errors causing the application not to run. These errors will not be visible on the application page, however they will be available in the Harper logs, which can be found on the [status page](instance-metrics). - -The Applications editor stores unsaved changes in cache. This means that occasionally your editor will show a discrepancy from the code that is stored and running on your Harper instance. You can identify if the code in your Studio differs if the "save" and "revert" buttons are active. To revert the cached version in your editor to the version of the file stored on your Harper instance click the "revert" button. - -## Accessing Your Application Endpoints - -Accessing your application endpoints varies with which type of endpoint you're creating. All endpoints, regardless of type, will be accessed via the [Harper HTTP port found in the Harper configuration file](../../deployments/configuration#http). The default port is `9926`, but you can verify what your instances is set to by navigating to the [instance config page](instance-configuration) and examining the read only JSON version of your instance's config file looking specifically for either the `http: port: 9926` or `http: securePort: 9926` configs. If `port` is set, you will access your endpoints via `http` and if `securePort` is set, you will access your endpoints via `https`. - -Below is a breakdown of how to access each type of endpoint. In these examples, we will use a locally hosted instance with `securePort` set to `9926`: `https://localhost:9926`. - -- **Standard REST Endpoints**\ - Standard REST endpoints are defined via the `@export` directive to tables in your schema definition. You can read more about these in the [Adding an Endpoint section of the Applications documentation](../../developers/applications/#adding-an-endpoint). Here, if we are looking to access a record with ID `1` from table `Dog` on our instance, [per the REST documentation](../../developers/rest), we could send a `GET` (or since this is a GET, we could post the URL in our browser) to `https://localhost:9926/Dog/1`. -- **Augmented REST Endpoints**\ - Harper Applications enable you to write [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) for your resources. Accessing these endpoints is identical to accessing the standard REST endpoints above, though you may have defined custom behavior in each function. Taking the example from the [Applications documentation](../../developers/applications/#custom-functionality-with-javascript), if we are looking to access the `DogWithHumanAge` example, we could send the GET to `https://localhost:9926/DogWithHumanAge/1`. -- **Fastify Routes**\ - If you need more functionality than the REST applications can provide, you can define your own custom endpoints using [Fastify Routes](../../developers/applications/#define-fastify-routes). The paths to these routes are defined via the application `config.yaml` file. You can read more about how you can customize the configuration options in the [Define Fastify Routes documentation](../../developers/applications/define-routes). By default, routes are accessed via the following pattern: `[Instance URL]:[HTTP Port]/[Project Name]/[Route URL]`. Using the example from the [Harper Application Template](https://github.com/HarperDB/application-template/), where we've named our project `application-template`, we would access the `getAll` route at `https://localhost/application-template/getAll`. - -## Creating a New Application - -1. From the application page, click the "+ app" button at the top right. -1. Click "+ Create A New Application Using The Default Template". -1. Enter a name for your project, note project names must contain only alphanumeric characters, dashes and underscores. -1. Click OK. -1. Your project will be available in the applications file navigator on the left. Click a file to select a file to edit. - -## Editing an Application - -1. From the applications page, click the file you would like to edit from the file navigator on the left. -1. Edit the file with any changes you'd like. -1. Click "save" at the top right. Note, as mentioned above, when you save a file, the Harper Applications server will be restarted immediately. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md b/versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md deleted file mode 100644 index 33482198..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/manage-databases-browse-data.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Manage Databases / Browse Data ---- - -# Manage Databases / Browse Data - -Manage instance databases/tables and browse data in tabular format with the following instructions: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **browse** in the instance control bar. - -Once on the instance browse page you can view data, manage databases and tables, add new data, and more. - -## Manage Databases and Tables - -#### Create a Database - -1. Click the plus icon at the top right of the databases section. -1. Enter the database name. -1. Click the green check mark. - -#### Delete a Database - -Deleting a database is permanent and irreversible. Deleting a database removes all tables and data within it. - -1. Click the minus icon at the top right of the databases section. -1. Identify the appropriate database to delete and click the red minus sign in the same row. -1. Click the red check mark to confirm deletion. - -#### Create a Table - -1. Select the desired database from the databases section. -1. Click the plus icon at the top right of the tables section. -1. Enter the table name. -1. Enter the primary key. - - _The primary key is also often referred to as the hash attribute in the studio, and it defines the unique identifier for each row in your table._ - -1. Click the green check mark. - -#### Delete a Table - -Deleting a table is permanent and irreversible. Deleting a table removes all data within it. - -1. Select the desired database from the databases section. -1. Click the minus icon at the top right of the tables section. -1. Identify the appropriate table to delete and click the red minus sign in the same row. -1. Click the red check mark to confirm deletion. - -## Manage Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Filter Table Data - -1. Click the magnifying glass icon at the top right of the table browser. -1. This expands the search filters. -1. The results will be filtered appropriately. - -#### Load CSV Data - -1. Click the data icon at the top right of the table browser. You will be directed to the CSV upload page where you can choose to import a CSV by URL or upload a CSV file. -1. To import a CSV by URL: - 1. Enter the URL in the **CSV file URL** textbox. - 1. Click **Import From URL**. - 1. The CSV will load, and you will be redirected back to browse table data. -1. To upload a CSV file: - 1. Click **Click or Drag to select a .csv file** (or drag your CSV file from your file browser). - 1. Navigate to your desired CSV file and select it. - 1. Click **Insert X Records**, where X is the number of records in your CSV. - 1. The CSV will load, and you will be redirected back to browse table data. - -#### Add a Record - -1. Click the plus icon at the top right of the table browser. -1. The Studio will pre-populate existing table attributes in JSON format. - - _The primary key is not included, but you can add it in and set it to your desired value. Auto-maintained fields are not included and cannot be manually set. You may enter a JSON array to insert multiple records in a single transaction._ - -1. Enter values to be added to the record. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -1. Click the **Add New** button. - -#### Edit a Record - -1. Click the record/row you would like to edit. -1. Modify the desired values. - - _You may add new attributes to the JSON; they will be reflexively added to the table._ - -1. Click the **save icon**. - -#### Delete a Record - -Deleting a record is permanent and irreversible. If transaction logging is turned on, the delete transaction will be recorded as well as the data that was deleted. - -1. Click the record/row you would like to delete. -1. Click the **delete icon**. -1. Confirm deletion by clicking the **check icon**. - -## Browse Table Data - -The following section assumes you have selected the appropriate table from the database/table browser. - -#### Browse Table Data - -The first page of table data is automatically loaded on table selection. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Table Data - -Click the refresh icon at the top right of the table browser. - -#### Automatically Refresh Table Data - -Toggle the auto switch at the top right of the table browser. The table data will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md b/versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md deleted file mode 100644 index 3662013c..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/manage-instance-roles.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: Manage Instance Roles ---- - -# Manage Instance Roles - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance role configuration is handled through the **roles** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the Harper Studio Organizations page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **roles** in the instance control bar. - -_Note, the **roles** page will only be available to super users._ - -The _roles management_ screen consists of the following panels: - -- **super users** - - Displays all super user roles for this instance. - -- **cluster users** - - Displays all cluster user roles for this instance. - -- **standard roles** - - Displays all standard roles for this instance. - -- **role permission editing** - - Once a role is selected for editing, permissions will be displayed here in JSON format. - -_Note, when new tables are added that are not configured, the Studio will generate configuration values with permissions defaulting to `false`._ - -## Role Management - -#### Create a Role - -1. Click the plus icon at the top right of the appropriate role section. - -1. Enter the role name. - -1. Click the green check mark. - -1. Optionally toggle the **manage databases/tables** switch to specify the `structure_user` config. - -1. Configure the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -1. Click **Update Role Permissions**. - -#### Modify a Role - -1. Click the appropriate role from the appropriate role section. - -1. Modify the role permissions in the role permission editing panel. - - _Note, to have the Studio generate attribute permissions JSON, toggle **show all attributes** at the top right of the role permission editing panel._ - -1. Click **Update Role Permissions**. - -#### Delete a Role - -Deleting a role is permanent and irreversible. A role cannot be remove if users are associated with it. - -1. Click the minus icon at the top right of the roles section. - -1. Identify the appropriate role to delete and click the red minus sign in the same row. - -1. Click the red check mark to confirm deletion. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md b/versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md deleted file mode 100644 index fb91fbbb..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/manage-instance-users.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Manage Instance Users ---- - -# Manage Instance Users - -Harper users and roles can be managed directly through the Harper Studio. It is recommended to read through the [users & roles documentation](../../developers/security/users-and-roles) to gain a strong understanding of how they operate. - -Instance user configuration is handled through the **users** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **users** in the instance control bar. - -_Note, the **users** page will only be available to super users._ - -## Add a User - -Harper instance users can be added with the following instructions. - -1. In the **add user** panel on the left enter: - - New user username. - - New user password. - - Select a role. - - _Learn more about role management here: [Manage Instance Roles](manage-instance-roles)._ - -1. Click **Add User**. - -## Edit a User - -Harper instance users can be modified with the following instructions. - -1. In the **existing users** panel, click the row of the user you would like to edit. - -1. To change a user’s password: - 1. In the **Change user password** section, enter the new password. - 1. Click **Update Password**. - -1. To change a user’s role: - 1. In the **Change user role** section, select the new role. - 1. Click **Update Role**. - -1. To delete a user: - 1. In the **Delete User** section, type the username into the textbox. - - _This is done for confirmation purposes._ - - 1. Click **Delete User**. diff --git a/versioned_docs/version-4.7/administration/harper-studio/manage-replication.md b/versioned_docs/version-4.7/administration/harper-studio/manage-replication.md deleted file mode 100644 index b3d357f7..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/manage-replication.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Manage Replication ---- - -# Manage Replication - -Harper instance clustering and replication can be configured directly through the Harper Studio. It is recommended to read through the [clustering documentation](../../reference/clustering) first to gain a strong understanding of Harper clustering behavior. - -All clustering configuration is handled through the **replication** page of the Harper Studio, accessed with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. - -1. Click the appropriate organization that the instance belongs to. - -1. Select your desired instance. - -1. Click **replication** in the instance control bar. - -Note, the **replication** page will only be available to super users. - ---- - -## Initial Configuration - -Harper instances do not have clustering configured by default. The Harper Studio will walk you through the initial configuration. Upon entering the **replication** screen for the first time you will need to complete the following configuration. Configurations are set in the **enable clustering** panel on the left while actions are described in the middle of the screen. It is worth reviewing the [Creating a Cluster User](../../reference/clustering/creating-a-cluster-user) document before proceeding. - -1. Enter Cluster User username. (Defaults to `cluster_user`). -1. Enter Cluster Password. -1. Review and/or Set Cluster Node Name. -1. Click **Enable Clustering**. - -At this point the Studio will restart your Harper Instance, required for the configuration changes to take effect. - ---- - -## Manage Clustering - -Once initial clustering configuration is completed you a presented with a clustering management screen with the following properties: - -- **connected instances** - - Displays all instances within the Studio Organization that this instance manages a connection with. - -- **unconnected instances** - - Displays all instances within the Studio Organization that this instance does not manage a connection with. - -- **unregistered instances** - - Displays all instances outside the Studio Organization that this instance manages a connection with. - -- **manage clustering** - - Once instances are connected, this will display clustering management options for all connected instances and all databases and tables. - ---- - -## Connect an Instance - -Harper Instances can be clustered together with the following instructions. - -1. Ensure clustering has been configured on both instances and a cluster user with identical credentials exists on both. - -1. Identify the instance you would like to connect from the **unconnected instances** panel. - -1. Click the plus icon next the appropriate instance. - -1. If configurations are correct, all databases will sync across the cluster, then appear in the **manage clustering** panel. If there is a configuration issue, a red exclamation icon will appear, click it to learn more about what could be causing the issue. - ---- - -## Disconnect an Instance - -Harper Instances can be disconnected with the following instructions. - -1. Identify the instance you would like to disconnect from the **connected instances** panel. - -1. Click the minus icon next the appropriate instance. - ---- - -## Manage Replication - -Subscriptions must be configured in order to move data between connected instances. Read more about subscriptions here: Creating A Subscription. The **manage clustering** panel displays a table with each row representing an channel per instance. Cells are bolded to indicate a change in the column. Publish and subscribe replication can be configured per table with the following instructions: - -1. Identify the instance, database, and table for replication to be configured. - -1. For publish, click the toggle switch in the **publish** column. - -1. For subscribe, click the toggle switch in the **subscribe** column. diff --git a/versioned_docs/version-4.7/administration/harper-studio/organizations.md b/versioned_docs/version-4.7/administration/harper-studio/organizations.md deleted file mode 100644 index f93eeff0..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/organizations.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: Organizations ---- - -# Organizations - -Harper Studio organizations provide the ability to group Harper Cloud Instances. Organization behavior is as follows: - -- Billing occurs at the organization level to a single credit card. -- Organizations retain their own unique Harper Cloud subdomain. -- Cloud instances reside within an organization. -- Studio users can be invited to organizations to share instances. - -An organization is automatically created for you when you sign up for Harper Studio. If you only have one organization, the Studio will automatically bring you to your organization’s page. - ---- - -## List Organizations - -A summary view of all organizations your user belongs to can be viewed on the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. You can navigate to this page at any time by clicking the **all organizations** link at the top of the Harper Studio. - -## Create a New Organization - -A new organization can be created as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the **Create a New Organization** card. -1. Fill out new organization details - - Enter Organization Name - _This is used for descriptive purposes only._ - - Enter Organization Subdomain - _Part of the URL that will be used to identify your Harper Cloud Instances. For example, with subdomain "demo" and instance name "c1" the instance URL would be: [https://c1-demo.harperdbcloud.com](https://c1-demo.harperdbcloud.com)._ -1. Click Create Organization. - -## Delete an Organization - -An organization cannot be deleted until all instances have been removed. An organization can be deleted as follows: - -1. Navigate to the Harper Studio Organizations page. -1. Identify the proper organization card and click the trash can icon. -1. Enter the organization name into the text box. - - _This is done for confirmation purposes to ensure you do not accidentally delete an organization._ - -1. Click the **Do It** button. - -## Manage Users - -Harper Studio organization owners can manage users including inviting new users, removing users, and toggling ownership. - -#### Inviting a User - -A new user can be invited to an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. In the **add user** box, enter the new user’s email address. -1. Click **Add User**. - -Users may or may not already be Harper Studio users when adding them to an organization. If the Harper Studio account already exists, the user will receive an email notification alerting them to the organization invitation. If the user does not have a Harper Studio account, they will receive an email welcoming them to Harper Studio. - ---- - -#### Toggle a User’s Organization Owner Status - -Organization owners have full access to the organization including the ability to manage organization users, create, modify, and delete instances, and delete the organization. Users must have accepted their invitation prior to being promoted to an owner. A user’s organization owner status can be toggled owner as follows: - -1. Navigate to the Harper Studio Organizations page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. Click the appropriate user from the **existing users** section. -1. Toggle the **Is Owner** switch to the desired status. - ---- - -#### Remove a User from an Organization - -Users may be removed from an organization at any time. Removing a user from an organization will not delete their Harper Studio account, it will only remove their access to the specified organization. A user can be removed from an organization as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **users** at the top of the screen. -1. Click the appropriate user from the **existing users** section. -1. Type **DELETE** in the text box in the **Delete User** row. - - _This is done for confirmation purposes to ensure you do not accidentally delete a user._ - -1. Click **Delete User**. - -## Manage Billing - -Billing is configured per organization and will be billed to the stored credit card at appropriate intervals (monthly or annually depending on the registered instance). Billing settings can be configured as follows: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/?redirect=/organizations) page. -1. Click the appropriate organization card. -1. Click **billing** at the top of the screen. - -Here organization owners can view invoices, manage coupons, and manage the associated credit card. - -_Harper billing and payments are managed via Stripe._ - -### Add a Coupon - -Coupons are applicable towards any paid tier or enterprise instance and you can change your subscription at any time. Coupons can be added to your Organization as follows: - -1. In the coupons panel of the **billing** page, enter your coupon code. -1. Click **Add Coupon**. -1. The coupon will then be available and displayed in the coupons panel. diff --git a/versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md b/versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md deleted file mode 100644 index e85f5e15..00000000 --- a/versioned_docs/version-4.7/administration/harper-studio/query-instance-data.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Query Instance Data ---- - -# Query Instance Data - -SQL queries can be executed directly through the Harper Studio with the following instructions: - -1. Navigate to the [Harper Studio Organizations](https://studio.harperdb.io/organizations) page. -1. Click the appropriate organization that the instance belongs to. -1. Select your desired instance. -1. Click **query** in the instance control bar. -1. Enter your SQL query in the SQL query window. -1. Click **Execute**. - -_Please note, the Studio will execute the query exactly as entered. For example, if you attempt to `SELECT *` from a table with millions of rows, you will most likely crash your browser._ - -## Browse Query Results Set - -#### Browse Results Set Data - -The first page of results set data is automatically loaded on query execution. Paging controls are at the bottom of the table. Here you can: - -- Page left and right using the arrows. -- Type in the desired page. -- Change the page size (the amount of records displayed in the table). - -#### Refresh Results Set - -Click the refresh icon at the top right of the results set table. - -#### Automatically Refresh Results Set - -Toggle the auto switch at the top right of the results set table. The results set will now automatically refresh every 15 seconds. Filters and pages will remain set for refreshed data. - -## Query History - -Query history is stored in your local browser cache. Executed queries are listed with the most recent at the top in the **query history** section. - -#### Rerun Previous Query - -- Identify the query from the **query history** list. -- Click the appropriate query. It will be loaded into the **sql query** input box. -- Click **Execute**. - -#### Clear Query History - -Click the trash can icon at the top right of the **query history** section. - -## Create Charts - -The Harper Studio includes a charting feature where you can build charts based on your specified queries. Visit the Charts documentation for more information. diff --git a/versioned_docs/version-4.7/administration/jobs.md b/versioned_docs/version-4.7/administration/jobs.md deleted file mode 100644 index c487f424..00000000 --- a/versioned_docs/version-4.7/administration/jobs.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -Harper Jobs are asynchronous tasks performed by the Operations API. - -## Job Summary - -Jobs uses an asynchronous methodology to account for the potential of a long-running operation. For example, exporting millions of records to S3 could take some time, so that job is started and the id is provided to check on the status. - -The job status can be **COMPLETE** or **IN_PROGRESS**. - -## Example Job Operations - -Example job operations include: - -[csv data load](../developers/operations-api/bulk-operations#csv-data-load) - -[csv file load](../developers/operations-api/bulk-operations#csv-file-load) - -[csv url load](../developers/operations-api/bulk-operations#csv-url-load) - -[import from s3](../developers/operations-api/bulk-operations#import-from-s3) - -[delete_records_before](../developers/operations-api/bulk-operations#delete-records-before) - -[export_local](../developers/operations-api/bulk-operations#export-local) - -[export_to_s3](../developers/operations-api/bulk-operations#export-to-s3) - -Example Response from a Job Operation - -``` -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - -Whenever one of these operations is initiated, an asynchronous job is created and the request contains the ID of that job which can be used to check on its status. - -## Managing Jobs - -To check on a job's status, use the [get_job](../developers/operations-api/jobs#get-job) operation. - -Get Job Request - -``` -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -Get Job Response - -``` -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - -## Finding Jobs - -To find jobs (if the ID is not known) use the [search_jobs_by_start_date](../developers/operations-api/jobs#search-jobs-by-start-date) operation. - -Search Jobs Request - -``` -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -Search Jobs Response - -``` -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.7/administration/logging/audit-logging.md b/versioned_docs/version-4.7/administration/logging/audit-logging.md deleted file mode 100644 index 209b4981..00000000 --- a/versioned_docs/version-4.7/administration/logging/audit-logging.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: Audit Logging ---- - -# Audit Logging - -### Audit log - -The audit log uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. - -Audit log is enabled by default. To disable the audit log, set `logging.auditLog` to false in the config file, `harperdb-config.yaml`. Then restart Harper for those changes to take place. Note, the audit is required to be enabled for real-time messaging. - -### Audit Log Operations - -#### read_audit_log - -The `read_audit_log` operation is flexible, enabling users to query with many parameters. All operations search on a single table. Filter options include timestamps, usernames, and table hash values. Additional examples found in the [Harper API documentation](../../developers/operations-api/logs). - -**Search by Timestamp** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558] -} -``` - -There are three outcomes using timestamp. - -- `"search_values": []` - All records returned for specified table -- `"search_values": [1660585740558]` - All records after provided timestamp -- `"search_values": [1660585740558, 1760585759710]` - Records "from" and "to" provided timestamp - ---- - -**Search by Username** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -The above example will return all records whose `username` is "admin." - ---- - -**Search by Primary Key** - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -The above example will return all records whose primary key (`hash_value`) is 318. - ---- - -#### read_audit_log Response - -The example that follows provides records of operations performed on a table. One thing of note is that the `read_audit_log` operation gives you the `original_records`. - -```json -{ - "operation": "update", - "user_name": "HDB_ADMIN", - "timestamp": 1607035559122.277, - "hash_values": [1, 2], - "records": [ - { - "id": 1, - "breed": "Muttzilla", - "age": 6, - "__updatedtime__": 1607035559122 - }, - { - "id": 2, - "age": 7, - "__updatedtime__": 1607035559121 - } - ], - "original_records": [ - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 2, - "name": "Penny" - }, - { - "__createdtime__": 1607035556801, - "__updatedtime__": 1607035556801, - "age": 5, - "breed": "Mutt", - "id": 1, - "name": "Harper" - } - ] -} -``` - -#### delete_audit_logs_before - -Just like with transaction logs, you can clean up your audit logs with the `delete_audit_logs_before` operation. It will delete audit log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "cat", - "timestamp": 1598290282817 -} -``` diff --git a/versioned_docs/version-4.7/administration/logging/index.md b/versioned_docs/version-4.7/administration/logging/index.md deleted file mode 100644 index bde1870a..00000000 --- a/versioned_docs/version-4.7/administration/logging/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Logging ---- - -# Logging - -Harper provides many different logging options for various features and functionality. - -- [Standard Logging](logging/standard-logging): Harper maintains a log of events that take place throughout operation. -- [Audit Logging](logging/audit-logging): Harper uses a standard Harper table to track transactions. For each table a user creates, a corresponding table will be created to track transactions against that table. -- [Transaction Logging](logging/transaction-logging): Harper stores a verbose history of all transactions logged for specified database tables, including original data records. diff --git a/versioned_docs/version-4.7/administration/logging/standard-logging.md b/versioned_docs/version-4.7/administration/logging/standard-logging.md deleted file mode 100644 index 044c2260..00000000 --- a/versioned_docs/version-4.7/administration/logging/standard-logging.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Standard Logging ---- - -# Standard Logging - -Harper maintains a log of events that take place throughout operation. Log messages can be used for diagnostics purposes as well as monitoring. - -All logs (except for the install log) are stored in the main log file in the hdb directory `/log/hdb.log`. The install log is located in the Harper application directory most likely located in your npm directory `npm/harperdb/logs`. - -Each log message has several key components for consistent reporting of events. A log message has a format of: - -``` - [] [] ...[]: -``` - -For example, a typical log entry looks like: - -``` -2023-03-09T14:25:05.269Z [notify] [main/0]: HarperDB successfully started. -``` - -The components of a log entry are: - -- `timestamp` - This is the date/time stamp when the event occurred -- `level` - This is an associated log level that gives a rough guide to the importance and urgency of the message. The available log levels in order of least urgent (and more verbose) are: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. -- `thread/ID` - This reports the name of the thread and the thread ID that the event was reported on. Note that NATS logs are recorded by their process name and there is no thread id for them since they are a separate process. Key threads are: - - `main` - This is the thread that is responsible for managing all other threads and routes incoming requests to the other threads - - `http` - These are the worker threads that handle the primary workload of incoming HTTP requests to the operations API and custom functions. - - `Clustering` - These are threads and processes that handle replication. - - `job` - These are job threads that have been started to handle operations that are executed in a separate job thread. -- `tags` - Logging from a custom function will include a "custom-function" tag in the log entry. Most logs will not have any additional tags. -- `message` - This is the main message that was reported. - -We try to keep logging to a minimum by default, to do this the default log level is `error`. If you require more information from the logs, increasing the log level down will provide that. - -The log level can be changed by modifying `logging.level` in the config file `harperdb-config.yaml`. - -## Clustering Logging - -Harper clustering utilizes two [NATS](https://nats.io/) servers, named Hub and Leaf. The Hub server is responsible for establishing the mesh network that connects instances of Harper and the Leaf server is responsible for managing the message stores (streams) that replicate and store messages between instances. Due to the verbosity of these servers there is a separate log level configuration for them. To adjust their log verbosity, set `clustering.logLevel` in the config file `harperdb-config.yaml`. Valid log levels from least verbose are `error`, `warn`, `info`, `debug` and `trace`. - -## Log File vs Standard Streams - -Harper logs can optionally be streamed to standard streams. Logging to standard streams (stdout/stderr) is primarily used for container logging drivers. For more traditional installations, we recommend logging to a file. Logging to both standard streams and to a file can be enabled simultaneously. To log to standard streams effectively, make sure to directly run `harperdb` and don't start it as a separate process (don't use `harperdb start`) and `logging.stdStreams` must be set to true. Note, logging to standard streams only will disable clustering catchup. - -## Logging Rotation - -Log rotation allows for managing log files, such as compressing rotated log files, archiving old log files, determining when to rotate, and the like. This will allow for organized storage and efficient use of disk space. For more information see "logging" in our [config docs](../../deployments/configuration). - -## Read Logs via the API - -To access specific logs you may query the Harper API. Logs can be queried using the `read_log` operation. `read_log` returns outputs from the log based on the provided search criteria. - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` diff --git a/versioned_docs/version-4.7/administration/logging/transaction-logging.md b/versioned_docs/version-4.7/administration/logging/transaction-logging.md deleted file mode 100644 index 99222e42..00000000 --- a/versioned_docs/version-4.7/administration/logging/transaction-logging.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Transaction Logging ---- - -# Transaction Logging - -Harper offers two options for logging transactions executed against a table. The options are similar but utilize different storage layers. - -## Transaction log - -The first option is `read_transaction_log`. The transaction log is built upon clustering streams. Clustering streams are per-table message stores that enable data to be propagated across a cluster. Harper leverages streams for use with the transaction log. When clustering is enabled all transactions that occur against a table are pushed to its stream, and thus make up the transaction log. - -If you would like to use the transaction log, but have not set up clustering yet, please see ["How to Cluster"](../../reference/clustering/). - -## Transaction Log Operations - -### read_transaction_log - -The `read_transaction_log` operation returns a prescribed set of records, based on given parameters. The example below will give a maximum of 2 records within the timestamps provided. - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1598290235769, - "to": 1660249020865, - "limit": 2 -} -``` - -_See example response below._ - -### read_transaction_log Response - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - -_See example request above._ - -### delete_transaction_logs_before - -The `delete_transaction_logs_before` operation will delete transaction log data according to the given parameters. The example below will delete records older than the timestamp provided. - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -_Note: Streams are used for catchup if a node goes down. If you delete messages from a stream there is a chance catchup won't work._ - -Read on for `read_audit_log`, the second option, for logging transactions executed against a table. diff --git a/versioned_docs/version-4.7/deployments/_category_.json b/versioned_docs/version-4.7/deployments/_category_.json deleted file mode 100644 index 95644c6b..00000000 --- a/versioned_docs/version-4.7/deployments/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Deployments", - "position": 3, - "link": { - "type": "generated-index", - "title": "Deployments Documentation", - "description": "Installation and deployment guides for HarperDB", - "keywords": ["deployments"] - } -} diff --git a/versioned_docs/version-4.7/deployments/configuration.md b/versioned_docs/version-4.7/deployments/configuration.md deleted file mode 100644 index 345113fb..00000000 --- a/versioned_docs/version-4.7/deployments/configuration.md +++ /dev/null @@ -1,1556 +0,0 @@ ---- -title: Configuration File ---- - -# Configuration File - -Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory (by default this is a directory named `hdb` located in the home directory of the current user). - -Some configuration will be populated by default in the config file on install, regardless of whether it is used. - ---- - -## Using the Configuration File and Naming Conventions - -The configuration elements in `harperdb-config.yaml` use camelcase, such as `operationsApi`. - -To change a configuration value, edit the `harperdb-config.yaml` file and save any changes. **HarperDB must be restarted for changes to take effect.** - -Alternatively, all configuration values can also be modified using environment variables, command line arguments, or the operations API via the [`set_configuration` operation](../developers/operations-api/configuration#set-configuration). - -For nested configuration elements, use underscores to represent parent-child relationships. When accessed this way, elements are case-insensitive. - -For example, to disable logging rotation in the `logging` section: - -```yaml -logging: - rotation: - enabled: false -``` - -You could apply this change using: - -- Environment variable: `LOGGING_ROTATION_ENABLED=false` -- Command line variable: `--LOGGING_ROTATION_ENABLED false` -- Operations API (`set_configuration`): `logging_rotation_enabled: false` - -To change the `port` in the `http` section, use: - -- Environment variable: `HTTP_PORT=` -- Command line variable: `--HTTP_PORT ` -- Operations API (`set_configuration`): `http_port: ` - -To set the `operationsApi.network.port` to `9925`, use: - -- Environment variable: `OPERATIONSAPI_NETWORK_PORT=9925` -- Command line variable: `--OPERATIONSAPI_NETWORK_PORT 9925` -- Operations API (`set_configuration`): `operationsApi_network_port: 9925` - -_Note: Component configuration cannot be added or updated via CLI or ENV variables._ - -## Importing installation configuration - -To use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your custom configuration file. - -To install Harper overtop of an existing configuration file, set `HDB_CONFIG` to the root path of your install `/harperdb-config.yaml` - -## Environment Variable-Based Configuration - -Harper provides two special environment variables for managing configuration: `HARPER_DEFAULT_CONFIG` and `HARPER_SET_CONFIG`. These variables allow you to configure Harper instances through environment variables using JSON-formatted configuration objects. - -### Overview - -Both environment variables accept JSON-formatted configuration that mirrors the structure of `harperdb-config.yaml`: - -```bash -export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' -export HARPER_SET_CONFIG='{"authentication":{"enabled":true}}' -``` - -The key difference between these variables is their precedence and behavior when configuration changes: - -| Feature | HARPER_DEFAULT_CONFIG | HARPER_SET_CONFIG | -| --------------- | ----------------------------- | ------------------------------- | -| **Purpose** | Provide sensible defaults | Force critical settings | -| **Precedence** | Lower (respects user edits) | Highest (always overrides) | -| **User edits** | Respected after detection | Always overridden | -| **Key removal** | Restores original values | Deletes values | -| **Use case** | Installation/runtime defaults | Security/compliance enforcement | - -### HARPER_DEFAULT_CONFIG - -`HARPER_DEFAULT_CONFIG` provides default configuration values while respecting user modifications. This is ideal for scenarios where you want to provide sensible defaults without preventing administrators from customizing their instances. - -#### Behavior - -**At installation time:** - -- Overrides template default values -- Respects values set by `HARPER_SET_CONFIG` -- Respects values from existing config files (when using `HDB_CONFIG`) - -**At runtime:** - -- Only updates values it originally set -- Automatically detects and respects manual user edits to the config file -- When a key is removed from the environment variable, the original value is restored - -#### Example: Setting Default Port - -```bash -# Set default port and logging level -export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' - -# Install and start Harper -npm install -g harperdb -harperdb - -# The config file will have port 8080 and info logging - -# If an administrator manually edits the config to use port 9000, -# Harper will detect this change and respect it on subsequent restarts - -# If you remove http.port from the env var later: -export HARPER_DEFAULT_CONFIG='{"logging":{"level":"info"}}' -# The port will be restored to its original template default (9925) -``` - -### HARPER_SET_CONFIG - -`HARPER_SET_CONFIG` forces configuration values that must never be changed by users. This is designed for security policies, compliance requirements, or critical operational settings that need to be enforced across all instances. - -#### Behavior - -**At runtime:** - -- Always overrides all other configuration sources -- Takes precedence over user edits, file values, and `HARPER_DEFAULT_CONFIG` -- When a key is removed from the environment variable, it's deleted from the config (no restoration) - -#### Example: Enforce Security Settings - -```bash -# Force authentication and specific logging for compliance -export HARPER_SET_CONFIG='{"authentication":{"enabled":true},"logging":{"level":"error","stdStreams":true}}' - -# Install and start Harper -npm install -g harperdb -harperdb - -# Any attempt to change these values in harperdb-config.yaml will be -# overridden on the next restart. The SET_CONFIG values always win. - -# If you later remove authentication from SET_CONFIG: -export HARPER_SET_CONFIG='{"logging":{"level":"error","stdStreams":true}}' -# The authentication section will be removed from the config entirely -``` - -### Combining Both Variables - -You can use both environment variables together for maximum flexibility: - -```bash -# Provide sensible defaults for most settings -export HARPER_DEFAULT_CONFIG='{"http":{"port":8080,"cors":true},"logging":{"level":"info"}}' - -# But enforce critical security settings that cannot be changed -export HARPER_SET_CONFIG='{"authentication":{"enabled":true,"sessionTokenExpiration":3600}}' -``` - -In this scenario: - -- Administrators can customize the HTTP port, CORS settings, and logging level -- Authentication settings are always enforced and cannot be changed - -### Configuration Precedence - -The complete configuration precedence order (highest to lowest): - -1. **HARPER_SET_CONFIG** - Always wins -2. **User manual edits** - Detected through drift detection -3. **HARPER_DEFAULT_CONFIG** - Applied if no user edits detected -4. **File defaults** - Original template values - -### State Tracking - -Harper maintains a state file at `{rootPath}/backup/.harper-config-state.json` to track the source of each configuration value. This enables: - -- **Drift detection**: Identifying when users manually edit values set by `HARPER_DEFAULT_CONFIG` -- **Restoration**: Restoring original values when keys are removed from `HARPER_DEFAULT_CONFIG` -- **Conflict resolution**: Determining which source should take precedence - -### Important Notes - -- Both environment variables must contain valid JSON matching the structure of `harperdb-config.yaml` -- Configuration validation occurs after environment variables are applied -- Invalid values will be caught by Harper's configuration validator -- Changes to these environment variables require a Harper restart to take effect -- The state file is specific to each Harper instance (stored in the root path) - -### Format Reference - -The JSON structure mirrors the YAML configuration file. For example: - -**YAML format:** - -```yaml -http: - port: 8080 - cors: true -logging: - level: info - rotation: - enabled: true -``` - -**Environment variable format:** - -```json -{ "http": { "port": 8080, "cors": true }, "logging": { "level": "info", "rotation": { "enabled": true } } } -``` - ---- - -## Configuration Options - -### `http` - -`sessionAffinity` - _Type_: string; _Default_: null - -Harper is a multi-threaded server designed to scale to utilize many CPU cores with high concurrency. Session affinity can help improve the efficiency and fairness of thread utilization by routing multiple requests from the same client to the same thread. This provides a fairer method of request handling by keeping a single user contained to a single thread, can improve caching locality (multiple requests from a single user are more likely to access the same data), and can provide the ability to share information in-memory in user sessions. Enabling session affinity will cause subsequent requests from the same client to be routed to the same thread. - -To enable `sessionAffinity`, you need to specify how clients will be identified from the incoming requests. If you are using Harper to directly serve HTTP requests from users from different remote addresses, you can use a setting of `ip`. However, if you are using Harper behind a proxy server or application server, all the remote ip addresses will be the same and Harper will effectively only run on a single thread. Alternately, you can specify a header to use for identification. If you are using basic authentication, you could use the "Authorization" header to route requests to threads by the user's credentials. If you have another header that uniquely identifies users/clients, you can use that as the value of sessionAffinity. But be careful to ensure that the value does provide sufficient uniqueness and that requests are effectively distributed to all the threads and fully utilizing all your CPU cores. - -```yaml -http: - sessionAffinity: ip -``` - -`compressionThreshold` - _Type_: number; _Default_: 1200 (bytes) - -For HTTP clients that support (Brotli) compression encoding, responses that are larger than this threshold will be compressed (also note that for clients that accept compression, any streaming responses from queries are compressed as well, since the size is not known beforehand). - -```yaml -http: - compressionThreshold: 1200 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`corsAccessControlAllowHeaders` - _Type_: string; _Default_: 'Accept, Content-Type, Authorization' - -A string representation of a comma separated list of header keys for the [Access-Control-Allow-Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) header for OPTIONS requests. - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`maxHeaderSize` - _Type_: integer; _Default_: 16394 - -The maximum allowed size of HTTP headers in bytes. - -`requestQueueLimit` - _Type_: integer; _Default_: 20000 - -The maximum estimated request queue time, in milliseconds. When the queue is above this limit, requests will be rejected with a 503. - -`keepAliveTimeout` - _Type_: integer; _Default_: 30,000 milliseconds (30 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9926 - -The port used to access the component server. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper component server uses for HTTPS connections. This requires a valid certificate and key. - -`http2` - _Type_: boolean; _Default_: false - -Enables HTTP/2 for the HTTP server. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -```yaml -http: - cors: true - corsAccessList: - - null - headersTimeout: 60000 - maxHeaderSize: 8192 - https: false - keepAliveTimeout: 30000 - port: 9926 - securePort: null - timeout: 120000 -``` - -`mtls` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for HTTP mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -**Note:** MQTT has its own `mqtt.network.mtls.user` setting (see [MQTT configuration](#mqtt)). - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming HTTP connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -**Note:** MQTT has its own `mqtt.network.mtls.required` setting (see [MQTT configuration](#mqtt)). Replication uses node-based authentication via certificates or IP addresses, with credential-based fallback (see [Securing Replication Connections](../developers/replication/#securing-connections)). - -`certificateVerification` - _Type_: boolean | object; _Default_: false (disabled) - -When mTLS is enabled, Harper can verify the revocation status of client certificates using CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol). This ensures that revoked certificates cannot be used for authentication. - -**Certificate verification is disabled by default** and must be explicitly enabled for production environments where certificate revocation checking is required. - -Set to `true` to enable with defaults, `false` to disable, or configure with an object: - -**Global Settings:** - -- `failureMode` - _Type_: string; _Default_: 'fail-closed' - Global behavior when verification fails: - - `'fail-open'`: Allow connection on verification failure (logs warning) - - `'fail-closed'`: Reject connection on verification failure (recommended) - -**CRL Configuration:** (enabled by default when certificateVerification is enabled) - -- `crl.enabled` - _Type_: boolean; _Default_: true - Enable/disable CRL checking -- `crl.timeout` - _Type_: number; _Default_: 10000 - Maximum milliseconds to wait for CRL download -- `crl.cacheTtl` - _Type_: number; _Default_: 86400000 - Milliseconds to cache CRL (24 hours) -- `crl.gracePeriod` - _Type_: number; _Default_: 86400000 - Grace period after CRL nextUpdate (24 hours) -- `crl.failureMode` - _Type_: string; _Default_: 'fail-closed' - CRL-specific failure mode - -**OCSP Configuration:** (enabled by default as fallback when certificateVerification is enabled) - -- `ocsp.enabled` - _Type_: boolean; _Default_: true - Enable/disable OCSP checking -- `ocsp.timeout` - _Type_: number; _Default_: 5000 - Maximum milliseconds to wait for OCSP response -- `ocsp.cacheTtl` - _Type_: number; _Default_: 3600000 - Milliseconds to cache successful OCSP responses (1 hour) -- `ocsp.errorCacheTtl` - _Type_: number; _Default_: 300000 - Milliseconds to cache OCSP errors (5 minutes) -- `ocsp.failureMode` - _Type_: string; _Default_: 'fail-closed' - OCSP-specific failure mode - -**Verification Strategy:** -Harper uses a CRL-first strategy with OCSP fallback. When a client certificate is presented: - -1. Check CRL if available (fast, cached locally) -2. Fall back to OCSP if CRL is not available or fails -3. Apply the configured failure mode if both methods fail - -Example configurations: - -```yaml -# Basic mTLS without certificate verification (certificate revocation not checked) -http: - mtls: true -``` - -```yaml -# mTLS with certificate verification enabled (recommended for production) -http: - mtls: - certificateVerification: true # Uses all defaults (CRL + OCSP, fail-closed) -``` - -```yaml -# Require mTLS for all connections + certificate verification -http: - mtls: - required: true # Reject connections without valid client certificate - certificateVerification: true -``` - -```yaml -# mTLS with custom verification settings for high-security environments -http: - mtls: - certificateVerification: - failureMode: fail-closed # Global setting - crl: - timeout: 15000 # 15 seconds for CRL download - cacheTtl: 43200000 # Cache CRLs for 12 hours - gracePeriod: 86400000 # 24 hour grace period - ocsp: - timeout: 8000 # 8 seconds for OCSP response - cacheTtl: 7200000 # Cache results for 2 hours -``` - -```yaml -# mTLS with CRL only (no OCSP fallback) -http: - mtls: - certificateVerification: - ocsp: false # Disable OCSP, CRL remains enabled -``` - ---- - -### `threads` - -The `threads` provides control over how many threads, how much heap memory they may use, and debugging of the threads: - -`count` - _Type_: number; _Default_: One less than the number of logical cores/processors - -The `threads.count` option specifies the number of threads that will be used to service the HTTP requests for the operations API and custom functions. Generally, this should be close to the number of CPU logical cores/processors to ensure the CPU is fully utilized (a little less because Harper does have other threads at work), assuming Harper is the main service on a server. - -```yaml -threads: - count: 11 -``` - -`debug` - _Type_: boolean | object; _Default_: false - -This enables debugging. If simply set to true, this will enable debugging on the main thread on port 9229 with the 127.0.0.1 host interface. This can also be an object for more debugging control. - -`debug.port` - The port to use for debugging the main thread `debug.startingPort` - This will set up a separate port for debugging each thread. This is necessary for debugging individual threads with devtools. `debug.host` - Specify the host interface to listen on `debug.waitForDebugger` - Wait for debugger before starting - -```yaml -threads: - debug: - port: 9249 -``` - -`maxHeapMemory` - _Type_: number; - -```yaml -threads: - maxHeapMemory: 300 -``` - -This specifies the heap memory limit for each thread, in megabytes. The default heap limit is a heuristic based on available memory and thread count. - -`heapSnapshotNearLimit` - _Type_: boolean; - -```yaml -threads: - heapSnapshotNearLimit: true -``` - -This specifies that a heap snapshot should be taken when the heap limit is near the limit. - ---- - -### `replication` - -The `replication` section configures [Harper replication](../developers/replication/), which is used to create Harper clusters and replicate data between the instances. - -```yaml -replication: - hostname: server-one - url: wss://server-one:9925 - databases: '*' - routes: - - wss://server-two:9925 - port: null - securePort: 9933, - enableRootCAs: true -``` - -`hostname` - _Type_: string; - -The hostname of the current Harper instance. - -`url` - _Type_: string; - -The URL of the current Harper instance. - -`databases` - _Type_: string/array; _Default_: "\*" (all databases) - -Configure which databases to replicate. This can be a string for all database or an array for specific databases. The list can be a simple array of database names: - -```yaml -replication: - databases: - - system - - data - - mydb -``` - -The database list can also specify databases that are purely "sharded" databases. For databases that are marked as sharded, replication will _only_ create database subscription connections to nodes in the same shard. Sharding can still function without this setting, since the residency location for sharding can be determined for each table or each record. However, using this setting will reduce the overhead of connections in situations where all data is uniformly sharded, creating a simpler and more efficient replication topology. To mark databases as sharded, you can specify a list of databases with a `name` and `sharded` flag: - -```yaml -replication: - databases: - - name: system - - name: data - sharded: true -``` - -`routes` - _Type_: array; - -An array of routes to connect to other nodes. Each element in the array can be either a string or an object with `hostname`, `port` and optionally `startTime` properties. - -`startTime` - _Type_: string; ISO formatted UTC date string. - -Replication will attempt to catch up on all remote data upon setup. To start replication from a specific date, set this property. - -`revokedCertificates` - _Type_: array; - -An array of serial numbers of revoked certificates. If a connection is attempted with a certificate that is in this list, the connection will be rejected. - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9925 # URL based route - - hostname: server-three # define a hostname and port - port: 9930 - startTime: 2024-02-06T15:30:00Z - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -`port` - _Type_: integer; - -The port to use for replication connections. - -`securePort` - _Type_: integer; _Default_: 9933 - -The port to use for secure replication connections. - -`enableRootCAs` - _Type_: boolean; _Default_: true - -When true, Harper will verify certificates against the Node.js bundled CA store. The bundled CA store is a snapshot of the Mozilla CA store that is fixed at release time. - -`mtls` - _Type_: object; - -Configures mTLS settings for replication connections. **mTLS is always required for replication** and cannot be disabled (for security reasons). You can configure certificate verification settings: - -```yaml -replication: - mtls: - certificateVerification: true # Enable certificate revocation checking -``` - -`certificateVerification` - _Type_: boolean | object; _Default_: false (disabled) - -When enabled, Harper will verify the revocation status of replication peer certificates using CRL and/or OCSP. This follows the same configuration structure as [HTTP certificate verification](#http) documented above. - -**Important:** mTLS itself is always enabled for replication connections and cannot be disabled. This setting only controls whether certificate revocation checking (CRL/OCSP) is performed. - -Example configurations: - -```yaml -# Replication with mTLS but no certificate verification (default) -replication: - hostname: server-one - routes: - - server-two - # mTLS is always enabled, certificate verification is optional -``` - -```yaml -# Replication with certificate verification enabled (recommended for production) -replication: - hostname: server-one - routes: - - server-two - mtls: - certificateVerification: true # Uses CRL and OCSP with defaults -``` - -```yaml -# Replication with custom certificate verification settings -replication: - hostname: server-one - routes: - - server-two - mtls: - certificateVerification: - crl: - timeout: 15000 - cacheTtl: 43200000 - ocsp: - timeout: 8000 -``` - -Certificate verification can also be configured via environment variables: - -```bash -REPLICATION_MTLS_CERTIFICATEVERIFICATION=true -REPLICATION_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed -REPLICATION_MTLS_CERTIFICATEVERIFICATION_CRL=true -REPLICATION_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 -REPLICATION_MTLS_CERTIFICATEVERIFICATION_OCSP=true -``` - -`blobTimeout` - _Type_: number; _Default_: 120000 - -Amount of time to wait for a blob to be transferred before timing out, measured in milliseconds. - -`failOver` - _Type_: boolean; _Default_: true - -When true, Harper will attempt to fail-over to subscribing to a different node if the current node is unreachable, to reach consistency. - -`shard` - _Type_: integer; - -This defines the shard id of this instance and is used in conjunction with the [Table Resource functions](../developers/replication/sharding#custom-sharding) `setResidency` & `setResidencyById` to programmatically route traffic to the proper shard. - ---- - -### `clustering` using NATS - -The `clustering` section configures the NATS clustering engine, this is used to replicate data between instances of Harper. - -_Note: There exist two ways to create clusters and replicate data in Harper. One option is to use native Harper replication over Websockets. The other option is to use_ [_NATS_](https://nats.io/about/) _to facilitate the cluster._ - -Clustering offers a lot of different configurations, however in a majority of cases the only options you will need to pay attention to are: - -- `clustering.enabled` Enable the clustering processes. -- `clustering.hubServer.cluster.network.port` The port other nodes will connect to. This port must be accessible from other cluster nodes. -- `clustering.hubServer.cluster.network.routes`The connections to other instances. -- `clustering.nodeName` The name of your node, must be unique within the cluster. -- `clustering.user` The name of the user credentials used for Inter-node authentication. - -`enabled` - _Type_: boolean; _Default_: false - -Enable clustering. - -_Note: If you enabled clustering but do not create and add a cluster user you will get a validation error. See `user` description below on how to add a cluster user._ - -```yaml -clustering: - enabled: true -``` - -`clustering.hubServer.cluster` - -Clustering’s `hubServer` facilitates the Harper mesh network and discovery service. - -```yaml -clustering: - hubServer: - cluster: - name: harperdb - network: - port: 9932 - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -`name` - _Type_: string, _Default_: harperdb - -The name of your cluster. This name needs to be consistent for all other nodes intended to be meshed in the same network. - -`port` - _Type_: integer, _Default_: 9932 - -The port the hub server uses to accept cluster connections - -`routes` - _Type_: array, _Default_: null - -An object array that represent the host and port this server will cluster to. Each object must have two properties `port` and `host`. Multiple entries can be added to create network resiliency in the event one server is unavailable. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.hubServer.leafNodes` - -```yaml -clustering: - hubServer: - leafNodes: - network: - port: 9931 -``` - -`port` - _Type_: integer; _Default_: 9931 - -The port the hub server uses to accept leaf server connections. - -`clustering.hubServer.network` - -```yaml -clustering: - hubServer: - network: - port: 9930 -``` - -`port` - _Type_: integer; _Default_: 9930 - -Use this port to connect a client to the hub server, for example using the NATs SDK to interact with the server. - -`clustering.leafServer` - -Manages streams, streams are ‘message stores’ that store table transactions. - -```yaml -clustering: - leafServer: - network: - port: 9940 - routes: - - host: 3.62.184.22 - port: 9931 - - host: node3.example.com - port: 9931 - streams: - maxAge: 3600 - maxBytes: 10000000 - maxMsgs: 500 - path: /user/hdb/clustering/leaf -``` - -`port` - _Type_: integer; _Default_: 9940 - -Use this port to connect a client to the leaf server, for example using the NATs SDK to interact with the server. - -`routes` - _Type_: array; _Default_: null - -An object array that represent the host and port the leaf node will directly connect with. Each object must have two properties `port` and `host`. Unlike the hub server, the leaf server will establish connections to all listed hosts. Routes can be added, updated and removed either by directly editing the `harperdb-config.yaml` file or by using the `cluster_set_routes` or `cluster_delete_routes` API endpoints. - -`host` - _Type_: string - -The host of the remote instance you are creating the connection with. - -`port` - _Type_: integer - -The port of the remote instance you are creating the connection with. This is likely going to be the `clustering.hubServer.cluster.network.port` on the remote instance. - -`clustering.leafServer.streams` - -`maxAge` - _Type_: integer; _Default_: null - -The maximum age of any messages in the stream, expressed in seconds. - -`maxBytes` - _Type_: integer; _Default_: null - -The maximum size of the stream in bytes. Oldest messages are removed if the stream exceeds this size. - -`maxMsgs` - _Type_: integer; _Default_: null - -How many messages may be in a stream. Oldest messages are removed if the stream exceeds this number. - -`path` - _Type_: string; _Default_: \/clustering/leaf - -The directory where all the streams are kept. - -```yaml -clustering: - leafServer: - streams: - maxConsumeMsgs: 100 - maxIngestThreads: 2 -``` - -`maxConsumeMsgs` - _Type_: integer; _Default_: 100 - -The maximum number of messages a consumer can process in one go. - -`maxIngestThreads` - _Type_: integer; _Default_: 2 - -The number of Harper threads that are delegated to ingesting messages. - ---- - -`logLevel` - _Type_: string; _Default_: error - -Control the verbosity of clustering logs. - -```yaml -clustering: - logLevel: error -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, and `error`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `warn`, the only entries logged will be `warn` and `error`. The default value is `error`. - -`nodeName` - _Type_: string; _Default_: null - -The name of this node in your Harper cluster topology. This must be a value unique from the rest of the cluster node names. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -```yaml -clustering: - nodeName: great_node -``` - -`tls` - -Transport Layer Security default values are automatically generated on install. - -```yaml -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`insecure` - _Type_: boolean; _Default_: true - -When true, will skip certificate verification. For use only with self-signed certs. - -`republishMessages` - _Type_: boolean; _Default_: false - -When true, all transactions that are received from other nodes are republished to this node's stream. When subscriptions are not fully connected between all nodes, this ensures that messages are routed to all nodes through intermediate nodes. This also ensures that all writes, whether local or remote, are written to the NATS transaction log. However, there is additional overhead with republishing, and setting this is to false can provide better data replication performance. When false, you need to ensure all subscriptions are fully connected between every node to every other node, and be aware that the NATS transaction log will only consist of local writes. - -`verify` - _Type_: boolean; _Default_: true - -When true, hub server will verify client certificate using the CA certificate. - ---- - -`user` - _Type_: string; _Default_: null - -The username given to the `cluster_user`. All instances in a cluster must use the same clustering user credentials (matching username and password). - -Inter-node authentication takes place via a special Harper user role type called `cluster_user`. - -The user can be created either through the API using an `add_user` request with the role set to `cluster_user`, or on install using environment variables `CLUSTERING_USER=cluster_person` `CLUSTERING_PASSWORD=pass123!` or CLI variables `harperdb --CLUSTERING_USER cluster_person` `--CLUSTERING_PASSWORD` `pass123!` - -```yaml -clustering: - user: cluster_person -``` - ---- - -### `localStudio` - -The `localStudio` section configures the local Harper Studio, a GUI for Harper hosted on the server. A hosted version of the Harper Studio with licensing and provisioning options is available at [https://studio.harperdb.io](https://studio.harperdb.io). Note, all database traffic from either `localStudio` or Harper Studio is made directly from your browser to the instance. - -`enabled` - _Type_: boolean; _Default_: false - -Enabled the local studio or not. - -```yaml -localStudio: - enabled: false -``` - ---- - -### `logging` - -The `logging` section configures Harper logging across all Harper functionality. This includes standard text logging of application and database events as well as structured data logs of record changes. Logging of application/database events are logged in text format to the `~/hdb/log/hdb.log` file (or location specified by `logging.root` or `logging.path`). Many of the logging configuration properties can be set and applied without a restart (are dynamically applied). - -In addition, structured logging of data changes are also available: - -`auditLog` - _Type_: boolean; _Default_: false - -Enabled table transaction logging. - -```yaml -logging: - auditLog: false -``` - -To access the audit logs, use the API operation `read_audit_log`. It will provide a history of the data, including original records and changes made, in a specified table. - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -`file` - _Type_: boolean; _Default_: true - -Defines whether to log to a file. - -```yaml -logging: - file: true -``` - -`auditRetention` - _Type_: string|number; _Default_: 3d - -This specifies how long audit logs should be retained. - -`level` - _Type_: string; _Default_: warn - -Control the verbosity of text event logs. - -```yaml -logging: - level: warn -``` - -There exists a log level hierarchy in order as `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify`. When the level is set to `trace` logs will be created for all possible levels. Whereas if the level is set to `fatal`, the only entries logged will be `fatal` and `notify`. The default value is `error`. - -`console` - _Type_: boolean; _Default_: true - -Controls whether console.log and other console.\* calls (as well as another JS components that writes to `process.stdout` and `process.stderr`) are logged to the log file. By default, these are not logged to the log file, but this can be enabled: - -```yaml -logging: - console: true -``` - -`root` - _Type_: string; _Default_: \/log - -The directory path where the log files will be written. - -```yaml -logging: - root: ~/hdb/log -``` - -`path` - _Type_: string; _Default_: \/log/hdb.log - -The file path where the log file will be written. - -```yaml -logging: - root: ~/hdb/log/hdb.log -``` - -`rotation` - -Rotation provides the ability for a user to systematically rotate and archive the `hdb.log` file. To enable `interval` and/or `maxSize` must be set. - -_**Note:**_ `interval` and `maxSize` are approximates only. It is possible that the log file will exceed these values slightly before it is rotated. - -```yaml -logging: - rotation: - enabled: true - compress: false - interval: 1D - maxSize: 100K - path: /user/hdb/log -``` - -`enabled` - _Type_: boolean; _Default_: true - -Enables logging rotation. - -`compress` - _Type_: boolean; _Default_: false - -Enables compression via gzip when logs are rotated. - -`interval` - _Type_: string; _Default_: null - -The time that should elapse between rotations. Acceptable units are D(ays), H(ours) or M(inutes). - -`maxSize` - _Type_: string; _Default_: null - -The maximum size the log file can reach before it is rotated. Must use units M(egabyte), G(igabyte), or K(ilobyte). - -`path` - _Type_: string; _Default_: \/log - -Where to store the rotated log file. File naming convention is `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. - -`stdStreams` - _Type_: boolean; _Default_: false - -Log Harper logs to the standard output and error streams. - -```yaml -logging: - stdStreams: false -``` - -`auditAuthEvents` - -`logFailed` - _Type_: boolean; _Default_: false - -Log all failed authentication events. - -_Example:_ `[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -`logSuccessful` - _Type_: boolean; _Default_: false - -Log all successful authentication events. - -_Example:_ `[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"}` - -```yaml -logging: - auditAuthEvents: - logFailed: false - logSuccessful: false -``` - -#### Defining Separate Logging Configurations - -Harper's logger supports defining multiple logging configurations for different components in the system. Each logging configuration can be assigned its own `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. All logging defaults to the configuration of the "main" logger as configured above, but when logging is configured for different loggers, they will use their own configuration. Separate loggers can be defined: - -`logging.external` - -The `logging.external` section can be used to define logging for all external components that use the [`logger` API](../reference/globals). For example: - -```yaml -logging: - external: - level: warn - path: ~/hdb/log/apps.log -``` - -`http.logging` - -This section defines log configuration for HTTP logging. By default, HTTP requests are not logged, but defining this section will enable HTTP logging. Note that there can be substantive overhead to logging all HTTP requests. In addition to the standard logging configuration, the `http.logging` section also allows the following configuration properties to be set: - -- `timing` - This will log timing information -- `headers` - This will log the headers in each request (which can be very verbose) -- `id` - This will assign a unique id to each request and log it in the entry for each request. This is assigned as the `request.requestId` property and can be used to by other logging to track a request. - Note that the `level` will determine which HTTP requests are logged: -- `info` (or more verbose) - All HTTP requests -- `warn` - HTTP requests with a status code of 400 or above -- `error` - HTTP requests with a status code of 500 - -For example: - -```yaml -http: - logging: - timing: true - level: info - path: ~/hdb/log/http.log - ... rest of http config -``` - -`authentication.logging` - -This section defines log configuration for authentication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`mqtt.logging` - -This section defines log configuration for MQTT. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`replication.logging` - -This section defines log configuration for replication. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`tls.logging` - -This section defines log configuration for TLS. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`storage.logging` - -This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`storage.logging` - -This section defines log configuration for setting up and reading the database files. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - -`analytics.logging` - -This section defines log configuration for analytics. This takes the standard logging configuration options of `path` (or `root`), `level`, `tag`, and flag to enable/disable logging to `stdStreams`. - ---- - -### `authentication` - -The authentication section defines the configuration for the default authentication mechanism in Harper. - -```yaml -authentication: - authorizeLocal: true - cacheTTL: 30000 - enableSessions: true - operationTokenTimeout: 1d - refreshTokenTimeout: 30d -``` - -`authorizeLocal` - _Type_: boolean; _Default_: true - -This will automatically authorize any requests from the loopback IP address as the superuser. This should be disabled for any Harper servers that may be accessed by untrusted users from the same instance. For example, this should be disabled if you are using a local proxy, or for general server hardening. - -`cacheTTL` - _Type_: number; _Default_: 30000 - -This defines the length of time (in milliseconds) that an authentication (a particular Authorization header or token) can be cached. - -`enableSessions` - _Type_: boolean; _Default_: true - -This will enable cookie-based sessions to maintain an authenticated session. This is generally the preferred mechanism for maintaining authentication in web browsers as it allows cookies to hold an authentication token securely without giving JavaScript code access to token/credentials that may open up XSS vulnerabilities. - -`operationTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time an operation token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -`refreshTokenTimeout` - _Type_: string; _Default_: 1d - -Defines the length of time a refresh token will be valid until it expires. Example values: [https://github.com/vercel/ms](https://github.com/vercel/ms). - -### `operationsApi` - -The `operationsApi` section configures the Harper Operations API.\ -All the `operationsApi` configuration is optional. Any configuration that is not provided under this section will default to the `http` configuration section. - -`network` - -```yaml -operationsApi: - network: - cors: true - corsAccessList: - - null - domainSocket: /user/hdb/operations-server - headersTimeout: 60000 - keepAliveTimeout: 5000 - port: 9925 - securePort: null - timeout: 120000 -``` - -`cors` - _Type_: boolean; _Default_: true - -Enable Cross Origin Resource Sharing, which allows requests across a domain. - -`corsAccessList` - _Type_: array; _Default_: null - -An array of allowable domains with CORS - -`domainSocket` - _Type_: string; _Default_: \/hdb/operations-server - -The path to the Unix domain socket used to provide the Operations API through the CLI - -`headersTimeout` - _Type_: integer; _Default_: 60,000 milliseconds (1 minute) - -Limit the amount of time the parser will wait to receive the complete HTTP headers with. - -`keepAliveTimeout` - _Type_: integer; _Default_: 5,000 milliseconds (5 seconds) - -Sets the number of milliseconds of inactivity the server needs to wait for additional incoming data after it has finished processing the last response. - -`port` - _Type_: integer; _Default_: 9925 - -The port the Harper operations API interface will listen on. - -`securePort` - _Type_: integer; _Default_: null - -The port the Harper operations API uses for HTTPS connections. This requires a valid certificate and key. - -`timeout` - _Type_: integer; _Default_: Defaults to 120,000 milliseconds (2 minutes) - -The length of time in milliseconds after which a request will timeout. - -`tls` - -This configures the Transport Layer Security for HTTPS support. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - ---- - -### `componentsRoot` - -`componentsRoot` - _Type_: string; _Default_: \/components - -The path to the folder containing the local component files. - -```yaml -componentsRoot: ~/hdb/components -``` - ---- - -### `rootPath` - -`rootPath` - _Type_: string; _Default_: home directory of the current user - -The Harper database and applications/API/interface are decoupled from each other. The `rootPath` directory specifies where the Harper application persists data, config, logs, and Custom Functions. - -```yaml -rootPath: /Users/jonsnow/hdb -``` - ---- - -### `storage` - -`writeAsync` - _Type_: boolean; _Default_: false - -The `writeAsync` option turns off disk flushing/syncing, allowing for faster write operation throughput. However, this does not provide storage integrity guarantees, and if a server crashes, it is possible that there may be data loss requiring restore from another backup/another node. - -```yaml -storage: - writeAsync: false -``` - -`caching` - _Type_: boolean; _Default_: true - -The `caching` option enables in-memory caching of records, providing faster access to frequently accessed objects. This can incur some extra overhead for situations where reads are extremely random and don't benefit from caching. - -```yaml -storage: - caching: true -``` - -`compression` - _Type_: boolean; _Default_: true - -The `compression` option enables compression of records in the database. This can be helpful for very large records in reducing storage requirements and potentially allowing more data to be cached. This uses the very fast LZ4 compression algorithm, but this still incurs extra costs for compressing and decompressing. - -```yaml -storage: - compression: false -``` - -`compression.dictionary` _Type_: number; _Default_: null - -Path to a compression dictionary file - -`compression.threshold` _Type_: number; _Default_: Either `4036` or if `storage.pageSize` provided `storage.pageSize - 60` - -Only entries that are larger than this value (in bytes) will be compressed. - -```yaml -storage: - compression: - dictionary: /users/harperdb/dict.txt - threshold: 1000 -``` - -`compactOnStart` - _Type_: boolean; _Default_: false - -When `true` all non-system databases will be compacted when starting Harper, read more [here](../administration/compact). - -`compactOnStartKeepBackup` - _Type_: boolean; _Default_: false - -Keep the backups made by compactOnStart. - -```yaml -storage: - compactOnStart: true - compactOnStartKeepBackup: false -``` - -`maxTransactionQueueTime` - _Type_: time; _Default_: 45s - -The `maxTransactionQueueTime` specifies how long the write queue can get before write requests are rejected (with a 503). - -```yaml -storage: - maxTransactionQueueTime: 2m -``` - -`noReadAhead` - _Type_: boolean; _Default_: false - -The `noReadAhead` option advises the operating system to not read ahead when reading from the database. This provides better memory utilization for databases with small records (less than one page), but can degrade performance in situations where large records are used or frequent range queries are used. - -```yaml -storage: - noReadAhead: true -``` - -`prefetchWrites` - _Type_: boolean; _Default_: true - -The `prefetchWrites` option loads data prior to write transactions. This should be enabled for databases that are larger than memory (although it can be faster to disable this for smaller databases). - -```yaml -storage: - prefetchWrites: true -``` - -`path` - _Type_: string; _Default_: `/database` - -The `path` configuration sets where all database files should reside. - -```yaml -storage: - path: /users/harperdb/storage -``` - -_**Note:**_ This configuration applies to all database files, which includes system tables that are used internally by Harper. For this reason if you wish to use a non default `path` value you must move any existing schemas into your `path` location. Existing schemas is likely to include the system schema which can be found at `/schema/system`. - -`blobPaths` - _Type_: string; _Default_: `/blobs` - -The `blobPaths` configuration sets where all the blob files should reside. This can be an array of paths, and if there are multiple, the blobs will be distributed across the paths. - -```yaml -storage: - blobPaths: - - /users/harperdb/big-storage -``` - -`pageSize` - _Type_: number; _Default_: Defaults to the default page size of the OS - -Defines the page size of the database. - -```yaml -storage: - pageSize: 4096 -``` - -`reclamation` - -The reclamation section provides configuration for the reclamation process, which is responsible for reclaiming space when free space is low. For example: - -```yaml -storage: - reclamation: - threshold: 0.4 # Start storage reclamation efforts when free space has reached 40% of the volume space (default) - interval: 1h # Reclamation will run every hour (default) - evictionFactor: 100000 # A factor used to determine how much aggressively to evict cached entries (default) -``` - ---- - -### `tls` - -The section defines the certificates, keys, and settings for Transport Layer Security (TLS) for HTTPS and TLS socket support. This is used for both the HTTP and MQTT protocols. The `tls` section can be a single object with the settings below, or it can be an array of objects, where each object is a separate TLS configuration. By using an array, the TLS configuration can be used to define multiple certificates for different domains/hosts (negotiated through SNI). - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`certificate` - _Type_: string; _Default_: \/keys/certificate.pem - -Path to the certificate file. - -`certificateAuthority` - _Type_: string; _Default_: \/keys/ca.pem - -Path to the certificate authority file. - -`privateKey` - _Type_: string; _Default_: \/keys/privateKey.pem - -Path to the private key file. - -`ciphers` - _Type_: string; - -Allows specific ciphers to be set. - -If you want to define multiple certificates that are applied based on the domain/host requested via SNI, you can define an array of TLS configurations. Each configuration can have the same properties as the root TLS configuration, but can (optionally) also have an additional `host` property to specify the domain/host that the certificate should be used for: - -```yaml -tls: - - certificate: ~/hdb/keys/certificate1.pem - certificateAuthority: ~/hdb/keys/ca1.pem - privateKey: ~/hdb/keys/privateKey1.pem - host: example.com # the host is optional, and if not provided, this certificate's common name will be used as the host name. - - certificate: ~/hdb/keys/certificate2.pem - certificateAuthority: ~/hdb/keys/ca2.pem - privateKey: ~/hdb/keys/privateKey2.pem -``` - -Note that a `tls` section can also be defined in the `operationsApi` section, which will override the root `tls` section for the operations API. - ---- - -### `mqtt` - -The MQTT protocol can be configured in this section. - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 - mtls: false - webSocket: true - requireAuthentication: true -``` - -`port` - _Type_: number; _Default_: 1883 - -This is the port to use for listening for insecure MQTT connections. - -`securePort` - _Type_: number; _Default_: 8883 - -This is the port to use for listening for secure MQTT connections. This will use the `tls` configuration for certificates. - -`webSocket` - _Type_: boolean; _Default_: true - -This enables access to MQTT through WebSockets. This will handle WebSocket connections on the http port (defaults to 9926), that have specified a (sub) protocol of `mqtt`. - -`requireAuthentication` - _Type_: boolean; _Default_: true - -This indicates if authentication should be required for establishing an MQTT connection (whether through MQTT connection credentials or mTLS). Disabling this allows unauthenticated connections, which are then subject to authorization for publishing and subscribing (and by default tables/resources do not authorize such access, but that can be enabled at the resource level). - -`mtls` - _Type_: boolean | object; _Default_: false - -This can be configured to enable mTLS based authentication for incoming connections. If enabled with default options (by setting to `true`), the client certificate will be checked against the certificate authority specified in the `tls` section. And if the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. - -You can also define specific mTLS options by specifying an object for mtls with the following (optional) properties which may be included: - -`user` - _Type_: string; _Default_: Common Name - -This configures a specific username to authenticate as for mTLS connections. If a `user` is defined, any authorized mTLS connection (that authorizes against the certificate authority) will be authenticated as this user. This can also be set to `null`, which indicates that no authentication is performed based on the mTLS authorization. When combined with `required: true`, this can be used to enforce that users must have authorized mTLS _and_ provide credential-based authentication. - -`required` - _Type_: boolean; _Default_: false - -This can be enabled to require client certificates (mTLS) for all incoming MQTT connections. If enabled, any connection that doesn't provide an authorized certificate will be rejected/closed. By default, this is disabled, and authentication can take place with mTLS _or_ standard credential authentication. - -`certificateAuthority` - _Type_: string; _Default_: Path from `tls.certificateAuthority` - -This can define a specific path to use for the certificate authority. By default, certificate authorization checks against the CA specified at `tls.certificateAuthority`, but if you need a specific/distinct CA for MQTT, you can set this. - -`certificateVerification` - _Type_: boolean | object; _Default_: true - -When mTLS is enabled, Harper verifies the revocation status of client certificates using OCSP (Online Certificate Status Protocol). This ensures that revoked certificates cannot be used for authentication. - -Set to `false` to disable certificate verification, or configure with an object: - -- `timeout` - _Type_: number; _Default_: 5000 - Maximum milliseconds to wait for OCSP response -- `cacheTtl` - _Type_: number; _Default_: 3600000 - Milliseconds to cache verification results (default: 1 hour) -- `failureMode` - _Type_: string; _Default_: 'fail-open' - Behavior when OCSP verification fails: - - `'fail-open'`: Allow connection on verification failure (logs warning) - - `'fail-closed'`: Reject connection on verification failure - -For example, you could specify that mTLS is required and will authenticate as "user-name": - -```yaml -mqtt: - network: - mtls: - user: user-name - required: true -``` - ---- - -### `databases` - -The `databases` section is an optional configuration that can be used to define where database files should reside down to the table level. This configuration should be set before the database and table have been created. The configuration will not create the directories in the path, that must be done by the user. - -To define where a database and all its tables should reside use the name of your database and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - path: /path/to/database -``` - -To define where specific tables within a database should reside use the name of your database, the `tables` parameter, the name of your table and the `path` parameter. - -```yaml -databases: - nameOfDatabase: - tables: - nameOfTable: - path: /path/to/table -``` - -This same pattern can be used to define where the audit log database files should reside. To do this use the `auditPath` parameter. - -```yaml -databases: - nameOfDatabase: - auditPath: /path/to/database -``` - -**Setting the database section through the command line, environment variables or API** - -When using command line variables,environment variables or the API to configure the databases section a slightly different convention from the regular one should be used. To add one or more configurations use a JSON object array. - -Using command line variables: - -```bash ---DATABASES [{\"nameOfSchema\":{\"tables\":{\"nameOfTable\":{\"path\":\"\/path\/to\/table\"}}}}] -``` - -Using environment variables: - -```bash -DATABASES=[{"nameOfSchema":{"tables":{"nameOfTable":{"path":"/path/to/table"}}}}] -``` - -Using the API: - -```json -{ - "operation": "set_configuration", - "databases": [ - { - "nameOfDatabase": { - "tables": { - "nameOfTable": { - "path": "/path/to/table" - } - } - } - } - ] -} -``` - -### `analytics` - -`analytics_aggregatePeriod` - _Type_: number; _Default_: 60 (seconds) - -This defines how often recorded metrics in the `system.hdb_raw_analytics` table are aggregated into the `system.hdb_analytics` table. The analytics operations in the operations API exclusively use the aggregated analytics. - -```yaml -analytics: - aggregatePeriod: 60 -``` - -`analytics_replicate` - _Type_: boolean; _Default_: false - -This defines whether or not the aggregated analytics data in `system.hdb_analytics` should be replicated to the rest of the cluster. - -```yaml -analytics: - replicate: true -``` - ---- - -### Components - -`` - _Type_: string - -The name of the component. This will be used to name the folder where the component is installed and must be unique. - -`package` - _Type_: string - -A reference to your [component](../reference/components/applications#adding-components-to-root) package. This could be a remote git repo, a local folder/file or an NPM package. Harper will add this package to a package.json file and call `npm install` on it, so any reference that works with that paradigm will work here. - -Read more about npm install [here](https://docs.npmjs.com/cli/v8/commands/npm-install) - -`port` - _Type_: number _Default_: whatever is set in `http.port` - -The port that your component should listen on. If no port is provided it will default to `http.port` - -```yaml -: - package: 'HarperDB-Add-Ons/package-name' - port: 4321 -``` diff --git a/versioned_docs/version-4.7/deployments/harper-cli.md b/versioned_docs/version-4.7/deployments/harper-cli.md deleted file mode 100644 index d447e892..00000000 --- a/versioned_docs/version-4.7/deployments/harper-cli.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: Harper CLI ---- - -# Harper CLI - -## Harper CLI - -The Harper command line interface (CLI) is used to administer [self-installed Harper instances](install-harper/). - -### Installing Harper - -To install Harper with CLI prompts, run the following command: - -```bash -harperdb install -``` - -Alternatively, Harper installations can be automated with environment variables or command line arguments; [see a full list of configuration parameters here](configuration#using-the-configuration-file-and-naming-conventions). Note, when used in conjunction, command line arguments will override environment variables. - -**Environment Variables** - -```bash -#minimum required parameters for no additional CLI prompts -export TC_AGREEMENT=yes -export HDB_ADMIN_USERNAME=HDB_ADMIN -export HDB_ADMIN_PASSWORD=password -export ROOTPATH=/tmp/hdb/ -export OPERATIONSAPI_NETWORK_PORT=9925 -harperdb install -``` - -**Command Line Arguments** - -```bash -#minimum required parameters for no additional CLI prompts -harperdb install --TC_AGREEMENT yes --HDB_ADMIN_USERNAME HDB_ADMIN --HDB_ADMIN_PASSWORD password --ROOTPATH /tmp/hdb/ --OPERATIONSAPI_NETWORK_PORT 9925 -``` - ---- - -### Starting Harper - -To start Harper after it is installed, run the following command: - -```bash -harperdb start -``` - ---- - -### Stopping Harper - -To stop Harper once it is running, run the following command: - -```bash -harperdb stop -``` - ---- - -### Restarting Harper - -To restart Harper once it is running, run the following command: - -```bash -harperdb restart -``` - ---- - -### Getting the Harper Version - -To check the version of Harper that is installed run the following command: - -```bash -harperdb version -``` - ---- - -### Renew self-signed certificates - -To renew the Harper generated self-signed certificates, run: - -```bash -harperdb renew-certs -``` - ---- - -### Copy a database with compaction - -To copy a Harper database with compaction (to eliminate free-space and fragmentation), use - -```bash -harperdb copy-db -``` - -For example, to copy the default database: - -```bash -harperdb copy-db data /home/user/hdb/database/copy.mdb -``` - ---- - -### Get all available CLI commands - -To display all available Harper CLI commands along with a brief description run: - -```bash -harperdb help -``` - ---- - -### Get the status of Harper and clustering - -To display the status of the Harper process, the clustering hub and leaf processes, the clustering network and replication statuses, run: - -```bash -harperdb status -``` - ---- - -### Backups - -Harper uses a transactional commit process that ensures that data on disk is always transactionally consistent with storage. This means that Harper maintains database integrity in the event of a crash. It also means that you can use any standard volume snapshot tool to make a backup of a Harper database. Database files are stored in the hdb/database directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down) , and database integrity will be preserved. Note that simply copying an in-use database file (using `cp`, for example) is _not_ a snapshot, and this would progressively read data from the database at different points in time, which yields unreliable copy that likely will not be usable. Standard copying is only reliable for a database file that is not in use. - ---- - -## Operations API through the CLI - -Some of the API operations are available through the CLI, this includes most operations that do not require nested parameters. To call the operation use the following convention: ` =`. By default, the result will be formatted as YAML, if you would like the result in JSON pass: `json=true`. - -Some examples are: - -```bash -$ harperdb describe_table database=dev table=dog - -schema: dev -name: dog -hash_attribute: id -audit: true -schema_defined: false -attributes: - - attribute: id - is_primary_key: true - - attribute: name - indexed: true -clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b -record_count: 10 -last_updated_record: 1724483231970.9949 -``` - -`harperdb set_configuration logging_level=error` - -`harperdb deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template` - -`harperdb get_components` - -`harperdb search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true` - -`harperdb search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]'` - -`harperdb sql sql='select * from dev.dog where id="1"'` - -### Remote Operations - -The CLI can also be used to run operations on remote Harper instances. To do this, pass the `target` parameter with the HTTP address of the remote instance. You generally will also need to provide credentials and specify the `username` and `password` parameters, or you can set environment variables `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD`, for example: - -```bash -export CLI_TARGET_USERNAME=HDB_ADMIN -export CLI_TARGET_PASSWORD=password -harperdb describe_database database=dev target=https://server.com:9925 -``` - -The same set of operations API are available for remote operations as well. - -#### Remote Component Deployment - -When using remote operations, you can deploy a local component to the remote instance. If you omit the `package` parameter, you can deploy the current directory. This will package the current directory and send it to the target server (also `deploy` is allowed as an alias to `deploy_component`): - -```bash -harperdb deploy target=https://server.com:9925 -``` - -If you are interacting with a cluster, you may wish to include the `replicated=true` parameter to ensure that the deployment operation is replicated to all nodes in the cluster. You will also need to restart afterwards to apply the changes (here seen with the replicated parameter): - -```bash -harperdb restart target=https://server.com:9925 replicated=true -``` diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/alarms.md b/versioned_docs/version-4.7/deployments/harper-cloud/alarms.md deleted file mode 100644 index 372807e5..00000000 --- a/versioned_docs/version-4.7/deployments/harper-cloud/alarms.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Alarms ---- - -# Alarms - -Harper Cloud instance alarms are triggered when certain conditions are met. Once alarms are triggered organization owners will immediately receive an email alert and the alert will be available on the [Instance Configuration](../../administration/harper-studio/instance-configuration) page. The below table describes each alert and their evaluation metrics. - -### Heading Definitions - -- **Alarm**: Title of the alarm. -- **Threshold**: Definition of the alarm threshold. -- **Intervals**: The number of occurrences before an alarm is triggered and the period that the metric is evaluated over. -- **Proposed Remedy**: Recommended solution to avoid the alert in the future. - -| Alarm | Threshold | Intervals | Proposed Remedy | -| ------- | ---------- | --------- | --------------------------------------------------------------------------------------------------------------------------- | -| Storage | > 90% Disk | 1 x 5min | [Increased storage volume](../../administration/harper-studio/instance-configuration#update-instance-storage) | -| CPU | > 90% Avg | 2 x 5min | [Increase instance size for additional CPUs](../../administration/harper-studio/instance-configuration#update-instance-ram) | -| Memory | > 90% RAM | 2 x 5min | [Increase instance size](../../administration/harper-studio/instance-configuration#update-instance-ram) | diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/index.md b/versioned_docs/version-4.7/deployments/harper-cloud/index.md deleted file mode 100644 index c0785d0d..00000000 --- a/versioned_docs/version-4.7/deployments/harper-cloud/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Harper Cloud ---- - -# Harper Cloud - -[Harper Cloud](https://studio.harperdb.io/) is the easiest way to test drive Harper, it’s Harper-as-a-Service. Cloud handles deployment and management of your instances in just a few clicks. Harper Cloud is currently powered by AWS with additional cloud providers on our roadmap for the future. - -You can create a new Harper Cloud instance in the Harper Studio. diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md b/versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md deleted file mode 100644 index 6ea4c7d2..00000000 --- a/versioned_docs/version-4.7/deployments/harper-cloud/instance-size-hardware-specs.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Instance Size Hardware Specs ---- - -# Instance Size Hardware Specs - -While Harper Cloud bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | -------------------------------------- | -| t3.micro | 1 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.small | 2 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| t3.medium | 4 | 2 | Up to 5 | 2.5 GHz Intel Xeon Platinum 8000 | -| m5.large | 8 | 2 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.xlarge | 16 | 4 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.2xlarge | 32 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.4xlarge | 64 | 16 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.8xlarge | 128 | 32 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.12xlarge | 192 | 48 | 10 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.16xlarge | 256 | 64 | 20 | Up to 3.1 GHz Intel Xeon Platinum 8000 | -| m5.24xlarge | 384 | 96 | 25 | Up to 3.1 GHz Intel Xeon Platinum 8000 | - -\*Specifications are subject to change. For the most up to date information, please refer to AWS documentation: [https://aws.amazon.com/ec2/instance-types/](https://aws.amazon.com/ec2/instance-types/). diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md b/versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md deleted file mode 100644 index 0b32df8e..00000000 --- a/versioned_docs/version-4.7/deployments/harper-cloud/iops-impact.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: IOPS Impact on Performance ---- - -# IOPS Impact on Performance - -Harper, like any database, can place a tremendous load on its storage resources. Storage, not CPU or memory, will more often be the bottleneck of server, virtual machine, or a container running Harper. Understanding how storage works, and how much storage performance your workload requires, is key to ensuring that Harper performs as expected. - -## IOPS Overview - -The primary measure of storage performance is the number of input/output operations per second (IOPS) that a storage device can perform. Different storage devices can have dramatically different performance profiles. A hard drive (HDD) might only perform a hundred or so IOPS, while a solid state drive (SSD) might be able to perform tens or hundreds of thousands of IOPS. - -Cloud providers like AWS, which powers Harper Cloud, don’t typically attach individual disks to a virtual machine or container. Instead, they combine large numbers of storage drives to create very high performance storage servers. Chunks (volumes) of that storage are then carved out and presented to many different virtual machines and containers. Due to the shared nature of this type of storage, the cloud provider places configurable limits on the number of IOPS that a volume can perform. The same way that cloud providers charge more for larger capacity volumes, they also charge more for volumes with more IOPS. - -## Harper Cloud Storage - -Harper Cloud utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp3) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp3 volumes have a baseline performance level of 3,000 IOPS, as a result, all Harper Cloud storage options will offer 3,000 IOPS. We plan to offer scalable IOPS as an option in the future. - -You can read more about AWS EBS volume IOPS here: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html. - -## Estimating IOPS for Harper Instance - -The number of IOPS required for a particular workload is influenced by many factors. Testing your particular application is the best way to determine the number of IOPS required. A reliable method is to estimate about two IOPS for every index, including the primary key itself. So if a table has two indices besides primary key, estimate that an insert or update will require about six IOPS. Note that that can often be closer to one IOPS per index under load due to internal batching of writes, and sometimes even better when doing sequential inserts. Again it is best to test to verify this with application specific data and write patterns. - -For assistance in estimating IOPS requirements feel free to contact Harper Support or join our Community Slack Channel. - -## Example Use Case IOPS Requirements - -- **Sensor Data Collection** - - In the case of IoT sensors where data collection will be sustained, high IOPS are required. While there are not typically large queries going on in this case, there is a high volume of data being ingested. This implies that IOPS will be sustained at a high level. For example, if you are collecting 100 records per second you would expect to need roughly 3,000 IOPS just to handle the data inserts. - -- **Data Analytics/BI Server** - - Providing a server for analytics purposes typically requires a larger machine. Typically these cases involve large scale SQL joins and aggregations, which puts a large strain on reads. Harper utilizes an in-memory cache, which provides a significant performance boost on machines with large amounts of memory. However, if disparate datasets are constantly being queried and/or new data is frequently being loaded, you will find that the system still needs to have high IOPS to meet performance demand. - -- **Web Services** - - Typical web service implementations with discrete reads and writes often do not need high IOPS to perform as expected. This is often the case in more transactional systems without the requirement for high performance load. A good rule to follow is that any Harper operation that requires a data scan will be IOPS intensive, but if these are not frequent then the EBS boost will suffice. Queries utilizing equals operations in either SQL or NoSQL do not require a scan due to Harper’s native indexing. - -- **High Performance Database** - - Ultimately, if performance is your top priority, Harper should be run on bare metal hardware. Cloud providers offer these options at a higher cost, but they come with obvious performance improvements. diff --git a/versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md b/versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md deleted file mode 100644 index aae57f67..00000000 --- a/versioned_docs/version-4.7/deployments/harper-cloud/verizon-5g-wavelength-instances.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Verizon 5G Wavelength ---- - -# Verizon 5G Wavelength - -These instances are only accessible from the Verizon network. When accessing your Harper instance please ensure you are connected to the Verizon network, examples include Verizon 5G Internet, Verizon Hotspots, or Verizon mobile devices. - -Harper on Verizon 5G Wavelength brings Harper closer to the end user exclusively on the Verizon network resulting in as little as single-digit millisecond response time from Harper to the client. - -Instances are built via AWS Wavelength. You can read more about [AWS Wavelength here](https://aws.amazon.com/wavelength/). - -Harper 5G Wavelength Instance Specs While Harper 5G Wavelength bills by RAM, each instance has other specifications associated with the RAM selection. The following table describes each instance size in detail\*. - -| AWS EC2 Instance Size | RAM (GiB) | # vCPUs | Network (Gbps) | Processor | -| --------------------- | --------- | ------- | -------------- | ------------------------------------------- | -| t3.medium | 4 | 2 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| t3.xlarge | 16 | 4 | Up to 5 | Up to 3.1 GHz Intel Xeon Platinum Processor | -| r5.2xlarge | 64 | 8 | Up to 10 | Up to 3.1 GHz Intel Xeon Platinum Processor | - -\*Specifications are subject to change. For the most up to date information, please refer to [AWS documentation](https://aws.amazon.com/ec2/instance-types/). - -## Harper 5G Wavelength Storage - -Harper 5G Wavelength utilizes AWS Elastic Block Storage (EBS) General Purpose SSD (gp2) volumes. This is the most common storage type used in AWS, as it provides reasonable performance for most workloads, at a reasonable price. - -AWS EBS gp2 volumes have a baseline performance level, which determines the number of IOPS it can perform indefinitely. The larger the volume, the higher its baseline performance. Additionally, smaller gp2 volumes are able to burst to a higher number of IOPS for periods of time. - -Smaller gp2 volumes are perfect for trying out the functionality of Harper, and might also work well for applications that don’t perform many database transactions. For applications that perform a moderate or high number of transactions, we recommend that you use a larger Harper volume. Learn more about the [impact of IOPS on performance here](iops-impact). - -You can read more about [AWS EBS gp2 volume IOPS here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html#ebsvolumetypes_gp2). diff --git a/versioned_docs/version-4.7/deployments/install-harper/index.md b/versioned_docs/version-4.7/deployments/install-harper/index.md deleted file mode 100644 index a11c85d8..00000000 --- a/versioned_docs/version-4.7/deployments/install-harper/index.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Install Harper ---- - -# Install Harper - -## Install Harper - -This documentation contains information for installing Harper locally. Note that if you’d like to get up and running quickly, you can deploy it to [Harper Fabric](https://fabric.harper.fast) our distributed data application platform service. Harper is a cross-platform database; we recommend Linux for production use. Installation is usually very simple and just takes a few steps, but there are a few different options documented here. Harper can also run on Windows and Mac, for development purposes only. Note: For Windows, we strongly recommend the use of Windows Subsystem for Linux (WSL). - -Harper runs on Node.js, so if you do not have it installed, you need to do that first (if you have installed, you can skip to installing Harper, itself). Node.js can be downloaded and installed from [their site](https://nodejs.org/). For Linux and Mac, we recommend installing and managing Node versions with [NVM, which has instructions for installation](https://github.com/nvm-sh/nvm). Generally NVM can be installed with the following command: - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash -``` - -And then logout and login, and then install Node.js using nvm. We recommend using LTS, but support all currently maintained Node versions (which is currently version 14 and newer, and make sure to always uses latest minor/patch for the major version): - -```bash -nvm install --lts -``` - -#### Install and Start Harper - -Then you can install Harper with NPM and start it: - -```bash -npm install -g harperdb -harperdb -``` - -Harper will automatically start after installation. Harper's installation can be configured with numerous options via CLI arguments, for more information visit the [Harper Command Line Interface](./harper-cli) guide. - -If you are setting up a production server on Linux, [we have much more extensive documentation on how to configure volumes for database storage, set up a systemd script, and configure your operating system to use as a database server in our linux installation guide](install-harper/linux). - -## With Docker - -If you would like to run Harper in Docker, install [Docker Desktop](https://docs.docker.com/desktop/) on your Mac or Windows computer. Otherwise, install the [Docker Engine](https://docs.docker.com/engine/install/) on your Linux server. You can then pull the image: - -```bash -docker pull harperdb/harperdb -``` - -Start a container, mount a volume and pass environment variables: - -```bash -docker run -d \ - -v :/home/harperdb/hdb \ - -e HDB_ADMIN_USERNAME=HDB_ADMIN \ - -e HDB_ADMIN_PASSWORD=password \ - -e THREADS=4 \ - -e OPERATIONSAPI_NETWORK_PORT=null \ - -e OPERATIONSAPI_NETWORK_SECUREPORT=9925 \ - -e HTTP_SECUREPORT=9926 \ - -p 9925:9925 \ - -p 9926:9926 \ - -p 9933:9933 \ - harperdb/harperdb -``` - -Here, the `` should be replaced with an actual directory path on your system where you want to store the persistent data. This command also exposes both the Harper Operations API (port 9925) and an additional HTTP port (9926). - -✅ Quick check: - -```bash -curl http://localhost:9925/health -``` - -:::info -💡 Why choose Docker: Great for consistent team environments, CI/CD pipelines, or deploying Harper alongside other services. -::: - -Once Docker Desktop or Docker Engine is installed, visit our [Docker Hub page](https://hub.docker.com/r/harperdb/harperdb) for information and examples on how to run a Harper container. - -## Offline Install - -If you need to install Harper on a device that doesn't have an Internet connection, you can choose your version and download the npm package and install it directly (you’ll still need Node.js and NPM): - -[Download Install Package](https://products-harperdb-io.s3.us-east-2.amazonaws.com/index.html) - -Once you’ve downloaded the .tgz file, run the following command from the directory where you’ve placed it: - -```bash -npm install -g harperdb-X.X.X.tgz harperdb install -``` - -## Installation on Less Common Platforms - -Harper comes with binaries for standard AMD64/x64 or ARM64 CPU architectures on Linux, Windows (x64 only), and Mac (including Apple Silicon). However, if you are installing on a less common platform (Alpine, for example), you will need to ensure that you have build tools installed for the installation process to compile the binaries (this is handled automatically), including: - -- [Go](https://go.dev/dl/): version 1.19.1 -- GCC -- Make -- Python v3.7, v3.8, v3.9, or v3.10 diff --git a/versioned_docs/version-4.7/deployments/install-harper/linux.md b/versioned_docs/version-4.7/deployments/install-harper/linux.md deleted file mode 100644 index cc312bac..00000000 --- a/versioned_docs/version-4.7/deployments/install-harper/linux.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -title: On Linux ---- - -# On Linux - -If you wish to install locally or already have a configured server, see the basic [Installation Guide](./) - -The following is a recommended way to configure Linux and install Harper. These instructions should work reasonably well for any public cloud or on-premises Linux instance. - ---- - -These instructions assume that the following has already been completed: - -1. Linux is installed -1. Basic networking is configured -1. A non-root user account dedicated to Harper with sudo privileges exists -1. An additional volume for storing Harper files is attached to the Linux instance -1. Traffic to ports 9925 (Harper Operations API) 9926 (Harper Application Interface) and 9932 (Harper Clustering) is permitted - -While you will need to access Harper through port 9925 for the administration through the operations API, and port 9932 for clustering, for higher level of security, you may want to consider keeping both of these ports restricted to a VPN or VPC, and only have the application interface (9926 by default) exposed to the public Internet. - -For this example, we will use an AWS Ubuntu Server 22.04 LTS m5.large EC2 Instance with an additional General Purpose SSD EBS volume and the default "ubuntu" user account. - ---- - -### (Optional) LVM Configuration - -Logical Volume Manager (LVM) can be used to stripe multiple disks together to form a single logical volume. If striping disks together is not a requirement, skip these steps. - -Find disk that already has a partition - -```bash -used_disk=$(lsblk -P -I 259 | grep "nvme.n1.*part" | grep -o "nvme.n1") -``` - -Create array of free disks - -```bash -declare -a free_disks -mapfile -t free_disks < <(lsblk -P -I 259 | grep "nvme.n1.*disk" | grep -o "nvme.n1" | grep -v "$used_disk") -``` - -Get quantity of free disks - -```bash -free_disks_qty=${#free_disks[@]} -``` - -Construct pvcreate command - -```bash -cmd_string="" -for i in "${free_disks[@]}" -do -cmd_string="$cmd_string /dev/$i" -done -``` - -Initialize disks for use by LVM - -```bash -pvcreate_cmd="pvcreate $cmd_string" -sudo $pvcreate_cmd -``` - -Create volume group - -```bash -vgcreate_cmd="vgcreate hdb_vg $cmd_string" -sudo $vgcreate_cmd -``` - -Create logical volume - -```bash -sudo lvcreate -n hdb_lv -i $free_disks_qty -l 100%FREE hdb_vg -``` - -### Configure Data Volume - -Run `lsblk` and note the device name of the additional volume - -```bash -lsblk -``` - -Create an ext4 filesystem on the volume (The below commands assume the device name is nvme1n1. If you used LVM to create logical volume, replace /dev/nvme1n1 with /dev/hdb_vg/hdb_lv) - -```bash -sudo mkfs.ext4 -L hdb_data /dev/nvme1n1 -``` - -Mount the file system and set the correct permissions for the directory - -```bash -mkdir /home/ubuntu/hdb -sudo mount -t ext4 /dev/nvme1n1 /home/ubuntu/hdb -sudo chown -R ubuntu:ubuntu /home/ubuntu/hdb -sudo chmod 775 /home/ubuntu/hdb -``` - -Create a fstab entry to mount the filesystem on boot - -```bash -echo "LABEL=hdb_data /home/ubuntu/hdb ext4 defaults,noatime 0 1" | sudo tee -a /etc/fstab -``` - -### Configure Linux and Install Prerequisites - -If a swap file or partition does not already exist, create and enable a 2GB swap file - -```bash -sudo dd if=/dev/zero of=/swapfile bs=128M count=16 -sudo chmod 600 /swapfile -sudo mkswap /swapfile -sudo swapon /swapfile -echo "/swapfile swap swap defaults 0 0" | sudo tee -a /etc/fstab -``` - -Increase the open file limits for the ubuntu user - -```bash -echo "ubuntu soft nofile 500000" | sudo tee -a /etc/security/limits.conf -echo "ubuntu hard nofile 1000000" | sudo tee -a /etc/security/limits.conf -``` - -Install Node Version Manager (nvm) - -```bash -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | bash -``` - -Load nvm (or logout and then login) - -```bash -. ~/.nvm/nvm.sh -``` - -Install Node.js using nvm ([read more about specific Node version requirements](https://www.npmjs.com/package/harperdb#prerequisites)) - -```bash -nvm install -``` - -### Install and Start Harper - -Here is an example of installing Harper with minimal configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" -``` - -Here is an example of installing Harper with commonly used additional configuration. - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --ROOTPATH "/home/ubuntu/hdb" \ - --OPERATIONSAPI_NETWORK_PORT "9925" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HTTP_SECUREPORT "9926" \ - --CLUSTERING_ENABLED "true" \ - --CLUSTERING_USER "cluster_user" \ - --CLUSTERING_PASSWORD "password" \ - --CLUSTERING_NODENAME "hdb1" -``` - -You can also use a custom configuration file to set values on install, use the CLI/ENV variable `HDB_CONFIG` and set it to the path of your [custom configuration file](../configuration): - -```bash -npm install -g harperdb -harperdb start \ - --TC_AGREEMENT "yes" \ - --HDB_ADMIN_USERNAME "HDB_ADMIN" \ - --HDB_ADMIN_PASSWORD "password" \ - --HDB_CONFIG "/path/to/your/custom/harperdb-config.yaml" -``` - -#### Start Harper on Boot - -Harper will automatically start after installation. If you wish Harper to start when the OS boots, you have two options: - -You can set up a crontab: - -```bash -(crontab -l 2>/dev/null; echo "@reboot PATH=\"/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH\" && harperdb start") | crontab - -``` - -Or you can create a systemd script at `/etc/systemd/system/harperdb.service` - -Pasting the following contents into the file: - -``` -[Unit] -Description=Harper - -[Service] -Type=simple -Restart=always -User=ubuntu -Group=ubuntu -WorkingDirectory=/home/ubuntu -ExecStart=/bin/bash -c 'PATH="/home/ubuntu/.nvm/versions/node/v18.15.0/bin:$PATH"; harperdb' - -[Install] -WantedBy=multi-user.target -``` - -And then running the following: - -``` -systemctl daemon-reload -systemctl enable harperdb -``` - -For more information visit the [Harper Command Line Interface guide](../harper-cli) and the [Harper Configuration File guide](../configuration). diff --git a/versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md b/versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md deleted file mode 100644 index da1c885f..00000000 --- a/versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Upgrade a Harper Instance ---- - -# Upgrade a Harper Instance - -This document describes best practices for upgrading self-hosted Harper instances. Harper can be upgraded using a combination of npm and built-in Harper upgrade scripts. Whenever upgrading your Harper installation it is recommended you make a backup of your data first. Note: This document applies to self-hosted Harper instances only. All [Harper Cloud instances](harper-cloud/) will be upgraded by the Harper Cloud team. - -## Upgrading - -Upgrading Harper is a two-step process. First the latest version of Harper must be downloaded from npm, then the Harper upgrade scripts will be utilized to ensure the newest features are available on the system. - -1. Install the latest version of Harper using `npm install -g harperdb`. - - Note `-g` should only be used if you installed Harper globally (which is recommended). - -1. Run `harperdb` to initiate the upgrade process. - - Harper will then prompt you for all appropriate inputs and then run the upgrade directives. - -## Node Version Manager (nvm) - -[Node Version Manager (nvm)](https://nvm.sh/) is an easy way to install, remove, and switch between different versions of Node.js as required by various applications. More information, including directions on installing nvm can be found here: [https://nvm.sh/](https://nvm.sh/). - -Harper supports Node.js versions 14.0.0 and higher, however, **please check our** [**NPM page**](https://www.npmjs.com/package/harperdb) **for our recommended Node.js version.** To install a different version of Node.js with nvm, run the command: - -```bash -nvm install -``` - -To switch to a version of Node run: - -```bash -nvm use -``` - -To see the current running version of Node run: - -```bash -node --version -``` - -With a handful of different versions of Node.js installed, run nvm with the `ls` argument to list out all installed versions: - -```bash -nvm ls -``` - -When upgrading Harper, we recommend also upgrading your Node version. Here we assume you're running on an older version of Node; the execution may look like this: - -Switch to the older version of Node that Harper is running on (if it is not the current version): - -```bash -nvm use 14.19.0 -``` - -Make sure Harper is not running: - -```bash -harperdb stop -``` - -Uninstall Harper. Note, this step is not required, but will clean up old artifacts of Harper. We recommend removing all other Harper installations to ensure the most recent version is always running. - -```bash -npm uninstall -g harperdb -``` - -Switch to the newer version of Node: - -```bash -nvm use -``` - -Install Harper globally - -```bash -npm install -g harperdb -``` - -Run the upgrade script - -```bash -harperdb -``` - -Start Harper - -```bash -harperdb start -``` - ---- - -## Upgrading Nats to Plexus 4.4 - -To upgrade from NATS clustering to Plexus replication, follow these manual steps. They are designed for a fully replicating cluster to ensure minimal disruption during the upgrade process. - -The core of this upgrade is the _bridge node_. This node will run both NATS and Plexus simultaneously, ensuring that transactions are relayed between the two systems during the transition. The bridge node is crucial in preventing any replication downtime, as it will handle transactions from NATS nodes to Plexus nodes and vice versa. - -### Enabling Plexus - -To enable Plexus on a node that is already running NATS, you will need to update [two values](configuration) in the `harperdb-config.yaml` file: - -```yaml -replication: - url: wss://my-cluster-node-1:9925 - hostname: node-1 -``` - -`replication.url` – This should be set to the URL of the current Harper instance. - -`replication.hostname` – Since we are upgrading from NATS, this value should match the `clustering.nodeName` of the current instance. - -### Upgrade Steps - -1. Set up the bridge node: - - Choose one node to be the bridge node. - - On this node, follow the "Enabling Plexus" steps from the previous section, but **do not disable NATS clustering on this instance.** - - Stop the instance and perform the upgrade. - - Start the instance. This node should now be running both Plexus and NATS. -1. Upgrade a node: - - Choose a node that needs upgrading and enable Plexus by following the "Enable Plexus" steps. - - Disable NATS by setting `clustering.enabled` to `false`. - - Stop the instance and upgrade it. - - Start the instance. - - Call [`add_node`](../developers/operations-api/clustering#add-node) on the upgraded instance. In this call, omit `subscriptions` so that a fully replicating cluster is built. The target node for this call should be the bridge node. _Note: depending on your setup, you may need to expand this `add_node` call to include_ [_authorization and/or tls information_](../developers/operations-api/clustering#add-node)_._ - -```json -{ - "operation": "add_node", - "hostname:": "node-1", - "url": "wss://my-cluster-node-1:9925" -} -``` - -1. Repeat Step 2 on all remaining nodes that need to be upgraded. -1. Disable NATS on the bridge node by setting `clustering.enabled` to `false` and restart the instance. - -Your cluster upgrade should now be complete, with no NATS processes running on any of the nodes. diff --git a/versioned_docs/version-4.7/developers/_category_.json b/versioned_docs/version-4.7/developers/_category_.json deleted file mode 100644 index fdc723e5..00000000 --- a/versioned_docs/version-4.7/developers/_category_.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "label": "Developers", - "position": 1, - "link": { - "type": "generated-index", - "title": "Developers Documentation", - "description": "Comprehensive guides and references for building applications with HarperDB", - "keywords": ["developers"] - } -} diff --git a/versioned_docs/version-4.7/developers/applications/caching.md b/versioned_docs/version-4.7/developers/applications/caching.md deleted file mode 100644 index 27e6f4e2..00000000 --- a/versioned_docs/version-4.7/developers/applications/caching.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -title: Caching ---- - -# Caching - -Harper has integrated support for caching data from external sources. With built-in caching capabilities and distributed high-performance low-latency responsiveness, Harper makes an ideal data caching server. Harper can store cached data in standard tables, as queryable structured data, so data can easily be consumed in one format (for example JSON or CSV) and provided to end users in different formats with different selected properties (for example MessagePack, with a subset of selected properties), or even with customized querying capabilities. Harper also manages and provides timestamps/tags for proper caching control, facilitating further downstreaming caching. With these combined capabilities, Harper is an extremely fast, interoperable, flexible, and customizable caching server. - -## Configuring Caching - -To set up caching, first you will need to define a table that you will use as your cache (to store the cached data). You can review the [introduction to building applications](./) for more information on setting up the application (and the [defining schemas documentation](defining-schemas)), but once you have defined an application folder with a schema, you can add a table for caching to your `schema.graphql`: - -```graphql -type MyCache @table(expiration: 3600) @export { - id: ID @primaryKey -} -``` - -You may also note that we can define a time-to-live (TTL) expiration on the table, indicating when table records/entries should expire and be evicted from this table. This is generally necessary for "passive" caches where there is no active notification of when entries expire. However, this is not needed if you provide a means of notifying when data is invalidated and changed. The units for expiration, and other duration-based properties, are in seconds. - -While you can provide a single expiration time, there are actually several expiration timings that are potentially relevant, and can be independently configured. These settings are available as directive properties on the table configuration (like `expiration` above): stale expiration: The point when a request for a record should trigger a request to origin (but might possibly return the current stale record depending on policy) must-revalidate expiration: The point when a request for a record must make a request to origin first and return the latest value from origin. eviction expiration: The point when a record is actually removed from the caching table. - -You can provide a single expiration and it defines the behavior for all three. You can also provide three settings for expiration, through table directives: - -- `expiration` - The amount of time until a record goes stale. -- `eviction` - The amount of time after expiration before a record can be evicted (defaults to zero). -- `scanInterval` - The interval for scanning for expired records (defaults to one quarter of the total of expiration and eviction). - -#### How `scanInterval` Determines the Eviction Cycle - -`scanInterval` determines fixed clock-aligned times when eviction runs, and these times are the same regardless of when the server started. Harper takes the `scanInterval` and divides the TTL (`expiration` + `eviction`) into evenly spaced “anchor times.” These anchors are calculated in the local timezone of the server. This allows Harper to “snap” the eviction schedule to predictable points on the clock, such as every 15 minutes or every 6 hours, based on the interval length. As a result: - -- The server’s startup time does not affect when eviction runs. -- Eviction timings are deterministic and timezone-aware. -- For any given configuration, the eviction schedule is the same across restarts and across servers in the same local timezone. - -#### Example: 1-Hour Expiration - -`expiration` = 1 hour with default `scanInterval` (15 minutes, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 00:15 -> 00:30 -> 00:45 -> 01:00 -> ... continuing every 15 minutes ... - -If the server starts at 12:05 it does not run eviction at 12:20 or “15 minutes after startup.” Instead, the next scheduled anchor is 12:15, then 12:30, 12:45, 13:00, etc. The schedule is clock-aligned, not startup-aligned. - -#### Example: 1-Day Expiration - -`expiration` = 1 day with default `scanInterval` (6 hours, one quarter of `expiration`). This creates the following fixed eviction schedule: - -> 00:00 -> 06:00 -> 12:00 -> 18:00 -> ... continuing every 6 hours ... - -If the server starts at 12:05 the next matching eviction time is 18:00 the same day, then 00:00, 06:00, 12:00, 18:00, etc. If the server starts at 19:30 the schedule does not shift. Instead, the next anchor time is 00:00, and the regular 6-hour cycle continues. - -## Define External Data Source - -Next, you need to define the source for your cache. External data sources could be HTTP APIs, other databases, microservices, or any other source of data. This can be defined as a resource class in your application's `resources.js` module. You can extend the `Resource` class (which is available as a global variable in the Harper environment) as your base class. The first method to implement is a `get()` method to define how to retrieve the source data. For example, if we were caching an external HTTP API, we might define it as such: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - return (await fetch(`https://some-api.com/${this.getId()}`)).json(); - } -} -``` - -Next, we define this external data resource as the "source" for the caching table we defined above: - -```javascript -const { MyCache } = tables; -MyCache.sourcedFrom(ThirdPartyAPI); -``` - -Now we have a fully configured and connected caching table. If you access data from `MyCache` (for example, through the REST API, like `/MyCache/some-id`), Harper will check to see if the requested entry is in the table and return it if it is available (and hasn't expired). If there is no entry, or it has expired (it is older than one hour in this case), it will go to the source, calling the `get()` method, which will then retrieve the requested entry. Once the entry is retrieved, it will be saved/cached in the caching table (for one hour based on our expiration time). - -```mermaid -flowchart TD - Client1(Client 1)-->Cache(Caching Table) - Client2(Client 2)-->Cache - Cache-->Resource(Data Source Connector) - Resource-->API(Remote Data Source API) -``` - -Harper handles waiting for an existing cache resolution to finish and uses its result. This prevents a "cache stampede" when entries expire, ensuring that multiple requests to a cache entry will all wait on a single request to the data source. - -Cache tables with an expiration are periodically pruned for expired entries. Because this is done periodically, there is usually some amount of time between when a record has expired and when the record is actually evicted (the cached data is removed). But when a record is checked for availability, the expiration time is used to determine if the record is fresh (and the cache entry can be used). - -### Eviction with Indexing - -Eviction is the removal of a locally cached copy of data, but it does not imply the deletion of the actual data from the canonical or origin data source. Because evicted records still exist (just not in the local cache), if a caching table uses expiration (and eviction), and has indexing on certain attributes, the data is not removed from the indexes. The indexes that reference the evicted record are preserved, along with the attribute data necessary to maintain these indexes. Therefore eviction means the removal of non-indexed data (in this case evictions are stored as "partial" records). Eviction only removes the data that can be safely removed from a cache without affecting the integrity or behavior of the indexes. If a search query is performed that matches this evicted record, the record will be requested on-demand to fulfill the search query. - -### Specifying a Timestamp - -In the example above, we simply retrieved data to fulfill a cache request. We may want to supply the timestamp of the record we are fulfilling as well. This can be set on the context for the request: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - let response = await fetch(`https://some-api.com/${this.getId()}`); - this.getContext().lastModified = response.headers.get('Last-Modified'); - return response.json(); - } -} -``` - -#### Specifying an Expiration - -In addition, we can also specify when a cached record "expires". When a cached record expires, this means that a request for that record will trigger a request to the data source again. This does not necessarily mean that the cached record has been evicted (removed), although expired records will be periodically evicted. If the cached record still exists, the data source can revalidate it and return it. For example: - -```javascript -class ThirdPartyAPI extends Resource { - async get() { - const context = this.getContext(); - let headers = new Headers(); - if (context.replacingVersion) // this is the existing cached record - headers.set('If-Modified-Since', new Date(context.replacingVersion).toUTCString()); - let response = await fetch(`https://some-api.com/${this.getId()}`, { headers }); - let cacheInfo = response.headers.get('Cache-Control'); - let maxAge = cacheInfo?.match(/max-age=(\d)/)?.[1]; - if (maxAge) // we can set a specific expiration time by setting context.expiresAt - context.expiresAt = Date.now() + maxAge * 1000; // convert from seconds to milliseconds and add to current time - // we can just revalidate and return the record if the origin has confirmed that it has the same version: - if (response.status === 304) return context.replacingRecord; - ... -``` - -## Active Caching and Invalidation - -The cache we have created above is a "passive" cache; it only pulls data from the data source as needed, and has no knowledge of if and when data from the data source has actually changed, so it must rely on timer-based expiration to periodically retrieve possibly updated data. This means that it is possible that the cache may have stale data for a while (if the underlying data has changed, but the cached data hasn't expired), and the cache may have to refresh more than necessary if the data source data hasn't changed. Consequently it can be significantly more effective to implement an "active" cache, in which the data source is monitored and notifies the cache when any data changes. This ensures that when data changes, the cache can immediately load the updated data, and unchanged data can remain cached much longer (or indefinitely). - -### Invalidate - -One way to provide more active caching is to specifically invalidate individual records. Invalidation is useful when you know the source data has changed, and the cache needs to re-retrieve data from the source the next time that record is accessed. This can be done by executing the `invalidate()` method on a resource. For example, you could extend a table (in your resources.js) and provide a custom POST handler that does invalidation: - -```javascript -const { MyTable } = tables; -export class MyTableEndpoint extends MyTable { - async post(data) { - if (data.invalidate) - // use this flag as a marker - this.invalidate(); - } -} -``` - -(Note that if you are now exporting this endpoint through resources.js, you don't necessarily need to directly export the table separately in your schema.graphql). - -### Subscriptions - -We can provide more control of an active cache with subscriptions. If there is a way to receive notifications from the external data source of data changes, we can implement this data source as an "active" data source for our cache by implementing a `subscribe` method. A `subscribe` method should return an asynchronous iterable that iterates and returns events indicating the updates. One straightforward way of creating an asynchronous iterable is by defining the `subscribe` method as an asynchronous generator. If we had an endpoint that we could poll for changes every second, we could implement this like: - -```javascript -class ThirdPartyAPI extends Resource { - async *subscribe() { - setInterval(() => { // every second retrieve more data - // get the next data change event from the source - let update = (await fetch(`https://some-api.com/latest-update`)).json(); - const event = { // define the change event (which will update the cache) - type: 'put', // this would indicate that the event includes the new data value - id: // the primary key of the record that updated - value: // the new value of the record that updated - timestamp: // the timestamp of when the data change occurred - }; - yield event; // this returns this event, notifying the cache of the change - }, 1000); - } - async get() { -... -``` - -Notification events should always include an `id` property to indicate the primary key of the updated record. The event should have a `value` property for `put` and `message` event types. The `timestamp` is optional and can be used to indicate the exact timestamp of the change. The following event `type`s are supported: - -- `put` - This indicates that the record has been updated and provides the new value of the record. -- `invalidate` - Alternately, you can notify with an event type of `invalidate` to indicate that the data has changed, but without the overhead of actually sending the data (the `value` property is not needed), so the data only needs to be sent if and when the data is requested through the cache. An `invalidate` will evict the entry and update the timestamp to indicate that there is new data that should be requested (if needed). -- `delete` - This indicates that the record has been deleted. -- `message` - This indicates a message is being passed through the record. The record value has not changed, but this is used for [publish/subscribe messaging](../real-time). -- `transaction` - This indicates that there are multiple writes that should be treated as a single atomic transaction. These writes should be included as an array of data notification events in the `writes` property. - -And the following properties can be defined on event objects: - -- `type`: The event type as described above. -- `id`: The primary key of the record that updated -- `value`: The new value of the record that updated (for put and message) -- `writes`: An array of event properties that are part of a transaction (used in conjunction with the transaction event type). -- `table`: The name of the table with the record that was updated. This can be used with events within a transaction to specify events across multiple tables. -- `timestamp`: The timestamp of when the data change occurred - -With an active external data source with a `subscribe` method, the data source will proactively notify the cache, ensuring a fresh and efficient active cache. Note that with an active data source, we still use the `sourcedFrom` method to register the source for a caching table, and the table will automatically detect and call the subscribe method on the data source. - -By default, Harper will only run the subscribe method on one thread. Harper is multi-threaded and normally runs many concurrent worker threads, but typically running a subscription on multiple threads can introduce overlap in notifications and race conditions and running on a subscription on a single thread is preferable. However, if you want to enable subscribe on multiple threads, you can define a `static subscribeOnThisThread` method to specify if the subscription should run on the current thread: - -```javascript -class ThirdPartyAPI extends Resource { - static subscribeOnThisThread(threadIndex) { - return threadIndex < 2; // run on two threads (the first two threads) - } - async *subscribe() { - .... -``` - -An alternative to using asynchronous generators is to use a subscription stream and send events to it. A default subscription stream (that doesn't generate its own events) is available from the Resource's default subscribe method: - -```javascript -class ThirdPartyAPI extends Resource { - subscribe() { - const subscription = super.subscribe(); - setupListeningToRemoteService().on('update', (event) => { - subscription.send(event); - }); - return subscription; - } -} -``` - -## Downstream Caching - -It is highly recommended that you utilize the [REST interface](../rest) for accessing caching tables, as it facilitates downstreaming caching for clients. Timestamps are recorded with all cached entries. Timestamps are then used for incoming [REST requests to specify the `ETag` in the response](../rest#cachingconditional-requests). Clients can cache data themselves and send requests using the `If-None-Match` header to conditionally get a 304 and preserve their cached data based on the timestamp/`ETag` of the entries that are cached in Harper. Caching tables also have [subscription capabilities](caching#subscribing-to-caching-tables), which means that downstream caches can be fully "layered" on top of Harper, both as passive or active caches. - -## Write-Through Caching - -The cache we have defined so far only has data flowing from the data source to the cache. However, you may wish to support write methods, so that writes to the cache table can flow through to underlying canonical data source, as well as populate the cache. This can be accomplished by implementing the standard write methods, like `put` and `delete`. If you were using an API with standard RESTful methods, you can pass writes through to the data source like this: - -```javascript -class ThirdPartyAPI extends Resource { - async put(data) { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'PUT', - body: JSON.stringify(data) - }); - } - async delete() { - await fetch(`https://some-api.com/${this.getId()}`, { - method: 'DELETE', - }); - } - ... -``` - -When doing an insert or update to the MyCache table, the data will be sent to the underlying data source through the `put` method and the new record value will be stored in the cache as well. - -### Loading from Source in Methods - -When you are using a caching table, it is important to remember that any resource methods besides `get()`, will not automatically load data from the source. If you have defined a `put()`, `post()`, or `delete()` method and you need the source data, you can ensure it is loaded by calling the `ensureLoaded()` method. For example, if you want to modify the existing record from the source, adding a property to it: - -```javascript -class MyCache extends tables.MyCache { - async post(data) { - // if the data is not cached locally, retrieves from source: - await this.ensuredLoaded(); - // now we can be sure that the data is loaded, and can access properties - this.quantity = this.quantity - data.purchases; - } -} -``` - -### Subscribing to Caching Tables - -You can subscribe to a caching table just like any other table. The one difference is that normal tables do not usually have `invalidate` events, but an active caching table may have `invalidate` events. Again, this event type gives listeners an opportunity to choose whether or not to actually retrieve the value that changed. - -### Passive-Active Updates - -With our passive update examples, we have provided a data source handler with a `get()` method that returns the specific requested record as the response. However, we can also actively update other records in our response handler (if our data source provides data that should be propagated to other related records). This can be done transactionally, to ensure that all updates occur atomically. The context that is provided to the data source holds the transaction information, so we can simply pass the context to any update/write methods that we call. For example, let's say we are loading a blog post, which also includes comment records: - -```javascript -const { Post, Comment } = tables; -class BlogSource extends Resource { - get() { - const post = await (await fetch(`https://my-blog-server/${this.getId()}`).json()); - for (let comment of post.comments) { - await Comment.put(comment, this); // save this comment as part of our current context and transaction - } - return post; - } -} -Post.sourcedFrom(BlogSource); -``` - -Here both the update to the post and the update to the comments will be atomically/transactionally committed together with the same timestamp. - -## Cache-Control header - -When interacting with cached data, you can also use the `Cache-Control` request header to specify certain caching behaviors. When performing a PUT (or POST) method, you can use the `max-age` directive to indicate how long the resource should be cached (until stale): - -```http -PUT /my-resource/id -Cache-Control: max-age=86400 -``` - -You can use the `only-if-cached` directive on GET requests to only return a resource if it is cached (otherwise will return 504). Note, that if the entry is not cached, this will still trigger a request for the source data from the data source. If you do not want source data retrieved, you can add the `no-store` directive. You can also use the `no-cache` directive if you do not want to use the cached resource. If you wanted to check if there is a cached resource without triggering a request to the data source: - -```http -GET /my-resource/id -Cache-Control: only-if-cached, no-store -``` - -You may also use the `stale-if-error` to indicate if it is acceptable to return a stale cached resource when the data source returns an error (network connection error, 500, 502, 503, or 504). The `must-revalidate` directive can indicate a stale cached resource can not be returned, even when the data source has an error (by default a stale cached resource is returned when there is a network connection error). - -## Caching Flow - -It may be helpful to understand the flow of a cache request. When a request is made to a caching table: - -- Harper will first create a resource instance to handle the process, and ensure that the data is loaded for the resource instance. To do this, it will first check if the record is in the table/cache. - - If the record is not in the cache, Harper will first check if there is a current request to get the record from the source. If there is, Harper will wait for the request to complete and return the record from the cache. - - If not, Harper will call the `get()` method on the source to retrieve the record. The record will then be stored in the cache. - - If the record is in the cache, Harper will check if the record is stale. If the record is not stale, Harper will immediately return the record from the cache. If the record is stale, Harper will call the `get()` method on the source to retrieve the record. - - The record will then be stored in the cache. This will write the record to the cache in a separate asynchronous/background write-behind transaction, so it does not block the current request, then return the data immediately once it has it. -- The `get()` method will be called on the resource instance to return the record to the client (or perform any querying on the record). If this is overriden, the method will be called at this time. - -### Caching Flow with Write-Through - -When a writes are performed on a caching table (in `put()` or `post()` method, for example), the flow is slightly different: - -- Harper will have first created a resource instance to handle the process, and this resource instance that will be the current `this` for a call to `put()` or `post()`. -- If a `put()` or `update()` is called, for example, this action will be record in the current transaction. -- Once the transaction is committed (which is done automatically as the request handler completes), the transaction write will be sent to the source to update the data. - - The local writes will wait for the source to confirm the writes have completed (note that this effectively allows you to perform a two-phase transactional write to the source, and the source can confirm the writes have completed before the transaction is committed locally). - - The transaction writes will then be written the local caching table. -- The transaction handler will wait for the local commit to be written, then the transaction will be resolved and a response will be sent to the client. diff --git a/versioned_docs/version-4.7/developers/applications/data-loader.md b/versioned_docs/version-4.7/developers/applications/data-loader.md deleted file mode 100644 index b4059207..00000000 --- a/versioned_docs/version-4.7/developers/applications/data-loader.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: Data Loader ---- - -# Data Loader - -The Data Loader is a built-in component that provides a reliable mechanism for loading data from JSON or YAML files into Harper tables as part of component deployment. This feature is particularly useful for ensuring specific records exist in your database when deploying components, such as seed data, configuration records, or initial application data. - -## Configuration - -To use the Data Loader, first specify your data files in the `config.yaml` in your component directory: - -```yaml -dataLoader: - files: 'data/*.json' -``` - -The Data Loader is an [Extension](../../reference/components#extensions) and supports the standard `files` configuration option. - -## Data File Format - -Data files can be structured as either JSON or YAML files containing the records you want to load. Each data file must specify records for a single table - if you need to load data into multiple tables, create separate data files for each table. - -### Basic Example - -Create a data file in your component's data directory (one table per file): - -```json -{ - "database": "myapp", - "table": "users", - "records": [ - { - "id": 1, - "username": "admin", - "email": "admin@example.com", - "role": "administrator" - }, - { - "id": 2, - "username": "user1", - "email": "user1@example.com", - "role": "standard" - } - ] -} -``` - -### Multiple Tables - -To load data into multiple tables, create separate data files for each table: - -**users.json:** - -```json -{ - "database": "myapp", - "table": "users", - "records": [ - { - "id": 1, - "username": "admin", - "email": "admin@example.com" - } - ] -} -``` - -**settings.yaml:** - -```yaml -database: myapp -table: settings -records: - - id: 1 - setting_name: app_name - setting_value: My Application - - id: 2 - setting_name: version - setting_value: '1.0.0' -``` - -## File Organization - -You can organize your data files in various ways: - -### Single File Pattern - -```yaml -dataLoader: - files: 'data/seed-data.json' -``` - -### Multiple Files Pattern - -```yaml -dataLoader: - files: - - 'data/users.json' - - 'data/settings.yaml' - - 'data/initial-products.json' -``` - -### Glob Pattern - -```yaml -dataLoader: - files: 'data/**/*.{json,yaml,yml}' -``` - -## Loading Behavior - -When Harper starts up with a component that includes the Data Loader: - -1. The Data Loader reads all specified data files (JSON or YAML) -1. For each file, it validates that a single table is specified -1. Records are inserted or updated based on content hash comparison: - - New records are inserted if they don't exist - - Existing records are updated only if the data file content has changed - - User modifications made via Operations API or other methods are preserved - those records won't be overwritten - - Users can add extra fields to data-loader records without blocking future updates to the original fields -1. The Data Loader uses SHA-256 content hashing stored in a system table (`hdb_dataloader_hash`) to track which records it has loaded and detect changes - -### Change Detection - -The Data Loader intelligently handles various scenarios: - -- **New records**: Inserted with their content hash stored -- **Unchanged records**: Skipped (no database writes) -- **Changed data file**: Records are updated using `patch` to preserve any extra fields users may have added -- **User-created records**: Records created outside the Data Loader (via Operations API, REST, etc.) are never overwritten -- **User-modified records**: Records modified after being loaded are preserved and not overwritten -- **User-added fields**: Extra fields added to data-loader records are preserved during updates - -This approach ensures data files can be safely reloaded across deployments and node scaling without losing user modifications. - -Note: While the Data Loader can create tables automatically by inferring the schema from the provided records, it's recommended to define your table schemas explicitly using the [graphqlSchema](../applications/defining-schemas) component for better control and type safety. - -## Best Practices - -1. **Define Schemas First**: While the Data Loader can infer schemas, it's strongly recommended to define your table schemas and relations explicitly using the [graphqlSchema](../applications/defining-schemas) component before loading data. This ensures proper data types, constraints, and relationships between tables. - -1. **One Table Per File**: Remember that each data file can only load records into a single table. Organize your files accordingly. - -1. **Idempotency**: Design your data files to be idempotent - they should be safe to load multiple times without creating duplicate or conflicting data. - -1. **Version Control**: Include your data files in version control to ensure consistency across deployments. - -1. **Environment-Specific Data**: Consider using different data files for different environments (development, staging, production). - -1. **Data Validation**: Ensure your data files are valid JSON or YAML and match your table schemas before deployment. - -1. **Sensitive Data**: Avoid including sensitive data like passwords or API keys directly in data files. Use environment variables or secure configuration management instead. - -## Example Component Structure - -``` -my-component/ -├── config.yaml -├── data/ -│ ├── users.json -│ ├── roles.json -│ └── settings.json -├── schemas.graphql -└── roles.yaml -``` - -With this structure, your `config.yaml` might look like: - -```yaml -# Load environment variables first -loadEnv: - files: '.env' - -# Define schemas -graphqlSchema: - files: 'schemas.graphql' - -# Define roles -roles: - files: 'roles.yaml' - -# Load initial data -dataLoader: - files: 'data/*.json' - -# Enable REST endpoints -rest: true -``` - -## Related Documentation - -- [Built-In Components](../../reference/components/built-in-extensions) -- [Extensions](../../reference/components/extensions) -- [Bulk Operations](../operations-api/bulk-operations) - For loading data via the Operations API diff --git a/versioned_docs/version-4.7/developers/applications/debugging.md b/versioned_docs/version-4.7/developers/applications/debugging.md deleted file mode 100644 index bd9d2622..00000000 --- a/versioned_docs/version-4.7/developers/applications/debugging.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Debugging Applications ---- - -# Debugging Applications - -Harper components and applications run inside the Harper process, which is a standard Node.js process that can be debugged with standard JavaScript development tools like Chrome's devtools, VSCode, and WebStorm. Debugging can be performed by launching the Harper entry script with your IDE, or you can start Harper in dev mode and connect your debugger to the running process (defaults to standard 9229 port): - -``` -harperdb dev -# or to run and debug a specific app -harperdb dev /path/to/app -``` - -Once you have connected a debugger, you may set breakpoints in your application and fully debug it. Note that when using the `dev` command from the CLI, this will run Harper in single-threaded mode. This would not be appropriate for production use, but makes it easier to debug applications. - -For local debugging and development, it is recommended that you use standard console log statements for logging. For production use, you may want to use Harper's logging facilities, so you aren't logging to the console. The logging functions are available on the global `logger` variable that is provided by Harper. This logger can be used to output messages directly to the Harper log using standardized logging level functions, described below. The log level can be set in the [Harper Configuration File](../../deployments/configuration). - -Harper Logger Functions - -- `trace(message)`: Write a 'trace' level log, if the configured level allows for it. -- `debug(message)`: Write a 'debug' level log, if the configured level allows for it. -- `info(message)`: Write a 'info' level log, if the configured level allows for it. -- `warn(message)`: Write a 'warn' level log, if the configured level allows for it. -- `error(message)`: Write a 'error' level log, if the configured level allows for it. -- `fatal(message)`: Write a 'fatal' level log, if the configured level allows for it. -- `notify(message)`: Write a 'notify' level log. - -For example, you can log a warning: - -```javascript -logger.warn('You have been warned'); -``` - -If you want to ensure a message is logged, you can use `notify` as these messages will appear in the log regardless of log level configured. - -## Viewing the Log - -The Harper Log can be found in your local `~/hdb/log/hdb.log` file (or in the log folder if you have specified an alternate hdb root), or in the Studio Status page. Additionally, you can use the [`read_log` operation](../operations-api/logs) to query the Harper log. diff --git a/versioned_docs/version-4.7/developers/applications/define-routes.md b/versioned_docs/version-4.7/developers/applications/define-routes.md deleted file mode 100644 index d16c787e..00000000 --- a/versioned_docs/version-4.7/developers/applications/define-routes.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: Define Fastify Routes ---- - -# Define Fastify Routes - -Harper’s applications provide an extension for loading [Fastify](https://www.fastify.io/) routes as a way to handle endpoints. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest) for better performance and standards compliance, Fastify's route can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. - -The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): - -```yaml -fastifyRoutes: # This loads files that define fastify routes using fastify's auto-loader - files: routes/*.js # specify the location of route definition modules - path: . # relative to the app-name, like https://server/app-name/route-name -``` - -By default, route URLs are configured to be: - -- \[**Instance URL**]:\[**HTTP Port**]/\[**Project Name**]/\[**Route URL**] - -However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. - -- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. - -In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/', - method: 'POST', - preValidation: hdbCore.preValidation, - handler: hdbCore.request, - }); -}; -``` - -## Custom Handlers - -For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. - -**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** - -```javascript -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - handler: (request) => { - request.body= { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` - }; - - const result = await hdbCore.requestWithoutAuthentication(request); - return result.filter((dog) => dog.age > 4); - } - }); -} -``` - -## Custom preValidation Hooks - -The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. - -Below is an example of a route that uses a custom validation hook: - -```javascript -import customValidation from '../helpers/customValidation'; - -export default async (server, { hdbCore, logger }) => { - server.route({ - url: '/:id', - method: 'GET', - preValidation: (request) => customValidation(request, logger), - handler: (request) => { - request.body = { - operation: 'sql', - sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, - }; - - return hdbCore.requestWithoutAuthentication(request); - }, - }); -}; -``` - -Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](define-routes#helper-methods). - -## Helper Methods - -When declaring routes, you are given access to 2 helper methods: hdbCore and logger. - -**hdbCore** - -hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. - -- **preValidation** - - This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. - -- **request** - - This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. - -- **requestWithoutAuthentication** - - Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: - - Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. - -**logger** - -This helper allows you to write directly to the log file, hdb.log. It’s useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. - -- logger.trace(‘Starting the handler for /dogs’) -- logger.debug(‘This should only fire once’) -- logger.warn(‘This should never ever fire’) -- logger.error(‘This did not go well’) -- logger.fatal(‘This did not go very well at all’) diff --git a/versioned_docs/version-4.7/developers/applications/defining-roles.md b/versioned_docs/version-4.7/developers/applications/defining-roles.md deleted file mode 100644 index 365aa132..00000000 --- a/versioned_docs/version-4.7/developers/applications/defining-roles.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Defining Application Roles ---- - -# Defining Application Roles - -Applications are more than just tables and endpoints — they need access rules. Harper lets you define roles directly in your application so you can control who can do what, without leaving your codebase. - -Let’s walk through creating a role, assigning it, and seeing it in action. - -## Step 1: Declare a Role - -First, point Harper to a roles configuration file. Add this to your `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -Then create a simple `roles.yaml` in your application directory. For example, here’s a role that can only read and insert data into the `Dog` table: - -```yaml -dog-reader: - super_user: false - data: - Dog: - read: true - insert: true -``` - -When Harper starts up, it will create this role (or update it if it already exists). - -## Step 2: Create a User for the Role - -Next, create a non-super_user user and assign them this role. You can do this with the [Users and Roles API](../security/users-and-roles) (requires a super_user to run): - -```bash -curl -u admin:password -X POST http://localhost:9926 \ - -H "Content-Type: application/json" \ - -d '{ - "operation": "add_user", - "username": "alice", - "password": "password", - "role": "dog_reader" - }' -``` - -Now you have a user named `alice` with the `dog_reader` role. - -## Step 3: Make Requests as Different Users - -Authenticate requests as `alice` to see how her role works: - -```bash -# allowed (insert, role permits insert) -curl -u alice:password -X POST http://localhost:9926/Dog/ \ - -H "Content-Type: application/json" \ - -d '{"name": "Buddy", "breed": "Husky"}' - -# not allowed (delete, role does not permit delete) -curl -u alice:password -X DELETE http://localhost:9926/Dog/1 -``` - -The first request succeeds with a `200 OK`. The second fails with a `403 Forbidden`. - -Now compare with a super_user: - -```bash -# super_user can delete -curl -u admin:password -X DELETE http://localhost:9926/Dog/1 -``` - -This succeeds because the super_user role has full permissions. - -## Where to Go Next - -This page gave you the basics - declare a role, assign it, and see it work. - -For more advanced scenarios, including: - -- defining multiple databases per role, -- granting fine-grained attribute-level permissions, -- and the complete structure of `roles.yaml`, - -see the [Roles Reference](../../reference/roles). diff --git a/versioned_docs/version-4.7/developers/applications/defining-schemas.md b/versioned_docs/version-4.7/developers/applications/defining-schemas.md deleted file mode 100644 index fba870e8..00000000 --- a/versioned_docs/version-4.7/developers/applications/defining-schemas.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: Defining Schemas ---- - -# Defining Schemas - -Schemas define tables and their attributes. Schemas can be declaratively defined in Harper's using GraphQL schema definitions. Schemas definitions can be used to ensure that tables exist (that are required for applications), and have the appropriate attributes. Schemas can define the primary key, data types for attributes, if they are required, and specify which attributes should be indexed. The [introduction to applications provides](./) a helpful introduction to how to use schemas as part of database application development. - -Schemas can be used to define the expected structure of data, but are also highly flexible and support heterogeneous data structures and by default allows data to include additional properties. The standard types for GraphQL schemas are specified in the [GraphQL schema documentation](https://graphql.org/learn/schema/). - -An example schema that defines a couple tables might look like: - -```graphql -# schema.graphql: -type Dog @table { - id: ID @primaryKey - name: String - breed: String - age: Int -} - -type Breed @table { - id: ID @primaryKey -} -``` - -In this example, you can see that we specified the expected data structure for records in the Dog and Breed table. For example, this will enforce that Dog records are required to have a `name` property with a string (or null, unless the type were specified to be non-nullable). This does not preclude records from having additional properties (see `@sealed` for preventing additional properties. For example, some Dog records could also optionally include a `favoriteTrick` property. - -In this page, we will describe the specific directives that Harper uses for defining tables and attributes in a schema. - -### Type Directives - -#### `@table` - -The schema for tables are defined using GraphQL type definitions with a `@table` directive: - -```graphql -type TableName @table -``` - -By default the table name is inherited from the type name (in this case the table name would be "TableName"). The `@table` directive supports several optional arguments (all of these are optional and can be freely combined): - -- `@table(table: "table_name")` - This allows you to explicitly specify the table name. -- `@table(database: "database_name")` - This allows you to specify which database the table belongs to. This defaults to the "data" database. -- `@table(expiration: 3600)` - Sets an expiration time on entries in the table before they are automatically cleared (primarily useful for caching tables). This is specified in seconds. -- `@table(audit: true)` - This enables the audit log for the table so that a history of record changes are recorded. This defaults to [configuration file's setting for `auditLog`](../../deployments/configuration#logging). - -Database naming: the default "data" database is generally a good default choice for tables in applications that will not be reused in other applications (and don't need to worry about staying in a separate namespace). Application with many tables may wish to organize the tables into separate databases (but remember that transactions do not preserve atomicity across different databases, only across tables in the same database). For components that are designed for re-use, it is recommended that you use a database name that is specific to the component (e.g. "my-component-data") to avoid name collisions with other components. - -#### `@export` - -This indicates that the specified table should be exported as a resource that is accessible as an externally available endpoints, through REST, MQTT, or any of the external resource APIs. - -This directive also accepts a `name` parameter to specify the name that should be used for the exported resource (how it will appear in the URL path). For example: - -``` -type MyTable @table @export(name: "my-table") -``` - -This table would be available at the URL path `/my-table/`. Without the `name` parameter, the exported name defaults to the name of the table type ("MyTable" in this example). - -### Relationships: `@relationship` - -Defining relationships is the foundation of using "join" queries in Harper. A relationship defines how one table relates to another table using a foreign key. Using the `@relationship` directive will define a property as a computed property, which resolves to the an record/instance from a target type, based on the referenced attribute, which can be in this table or the target table. The `@relationship` directive must be used in combination with an attribute with a type that references another table. - -#### `@relationship(from: attribute)` - -This defines a relationship where the foreign key is defined in this table, and relates to the primary key of the target table. If the foreign key is single-valued, this establishes a many-to-one relationship with the target table. The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define a foreign key that references another table and then define the relationship. Here we create a `brandId` attribute that will be our foreign key (it will hold an id that references the primary key of the Brand table), and we define a relationship to the `Brand` table through the `brand` attribute: - -```graphql -type Product @table @export { - id: ID @primaryKey - brandId: ID @indexed - brand: Brand @relationship(from: brandId) -} -type Brand @table @export { - id: ID @primaryKey -} -``` - -Once this is defined we can use the `brand` attribute as a [property in our product instances](../../reference/resources/) and allow for querying by `brand` and selecting brand attributes as returned properties in [query results](../rest). - -Again, the foreign key may be a multi-valued array (array of keys referencing the target table records). For example, if we had a list of features that references a Feature table: - -```graphql -type Product @table @export { - id: ID @primaryKey - featureIds: [ID] @indexed # array of ids - features: [Feature] @relationship(from: featureIds) # array of referenced feature records -} -type Feature @table { - id: ID @primaryKey - ... -} -``` - -#### `@relationship(to: attribute)` - -This defines a relationship where the foreign key is defined in the target table and relates to primary key of this table. If the foreign key is single-valued, this establishes a one-to-many relationship with the target table. Note that the target table type must be an array element type (like `[Table]`). The foreign key may also be a multi-valued array, in which case this will be a many-to-many relationship. For example, we can define on a reciprocal relationship, from the example above, adding a relationship from brand back to product. Here we use continue to use the `brandId` attribute from the `Product` schema, and we define a relationship to the `Product` table through the `products` attribute: - -```graphql -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: brandId) -} -``` - -Once this is defined we can use the `products` attribute as a property in our brand instances and allow for querying by `products` and selecting product attributes as returned properties in query results. - -Note that schemas can also reference themselves with relationships, allowing records to define relationships like parent-child relationships between records in the same table. Also note, that for a many-to-many relationship, you must not combine the `to` and `from` property in the same relationship directive. - -### Computed Properties: `@computed` - -The `@computed` directive specifies that a field is computed based on other fields in the record. This is useful for creating derived fields that are not stored in the database, but are computed when specific record fields is queried/accessed. The `@computed` directive must be used in combination with a field that is a function that computes the value of the field. For example: - -```graphql -type Product @table { - id: ID @primaryKey - price: Float - taxRate: Float - totalPrice: Float @computed(from: "price + (price * taxRate)") -} -``` - -The `from` argument specifies the expression that computes the value of the field. The expression can reference other fields in the record. The expression is evaluated when the record is queried or indexed. - -The `computed` directive may also be defined in a JavaScript module, which is useful for more complex computations. You can specify a computed attribute, and then define the function with the `setComputedAttribute` method. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed -} -``` - -```javascript -tables.Product.setComputedAttribute('totalPrice', (record) => { - return record.price + record.price * record.taxRate; -}); -``` - -Computed properties may also be indexed, which provides a powerful mechanism for creating indexes on derived fields with custom querying capabilities. This can provide a mechanism for composite indexes, custom full-text indexing, vector indexing, or other custom indexing strategies. A computed property can be indexed by adding the `@indexed` directive to the computed property. When using a JavaScript module for a computed property that is indexed, it is highly recommended that you specify a `version` argument to ensure that the computed attribute is re-evaluated when the function is updated. For example: - -```graphql -type Product @table { -... - totalPrice: Float @computed(version: 1) @indexed -} -``` - -If you were to update the `setComputedAttribute` function for the `totalPrice` attribute, to use a new formula, you must increment the `version` argument to ensure that the computed attribute is re-indexed (note that on a large database, re-indexing may be a lengthy operation). Failing to increment the `version` argument with a modified function can result in an inconsistent index. The computed function must be deterministic, and should not have side effects, as it may be re-evaluated multiple times during indexing. - -Note that computed properties will not be included by default in a query result, you must explicitly include them in query results using the `select` query function. - -Another example of using a computed custom index, is that we could index all the comma-separated words in a `tags` property by doing (similar techniques are used for full-text indexing): - -```graphql -type Product @table { - id: ID @primaryKey - tags: String # comma delimited set of tags - tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed # split and index the tags -} -``` - -For more in-depth information on computed properties, visit our blog [here](https://www.harpersystems.dev/development/tutorials/how-to-create-custom-indexes-with-computed-properties) - -### Field Directives - -The field directives can be used for information about each attribute in table type definition. - -#### `@primaryKey` - -The `@primaryKey` directive specifies that an attribute is the primary key for a table. These must be unique and when records are created, this will be auto-generated if no primary key is provided. When a primary key is auto-generated, it will be a UUID (as a string) if the primary key type is `String` or `ID`. If the primary key type is `Int`, `Long`, or `Any`, then the primary key will be an auto-incremented number. Using numeric primary keys is more efficient than using UUIDs. Note that if the type is `Int`, the primary key will be limited to 32-bit, which can be limiting and problematic for large tables. It is recommended that if you will be relying on auto-generated keys, that you use a primary key type of `Long` or `Any` (the latter will allow you to also use strings as primary keys). - -#### `@indexed` - -The `@indexed` directive specifies that an attribute should be indexed. When an attribute is indexed, Harper will create secondary index from the data in this field for fast/efficient querying using this field. This is necessary if you want to execute queries using this attribute (whether that is through RESTful query parameters, SQL, or NoSQL operations). - -A standard index will index the values in each field, so you can query directly by those values. If the field's value is an array, each of the values in the array will be indexed (you can query by any individual value). - -#### Vector Indexing - -The `@indexed` directive can also specify a `type`. To use vector indexing, you can specify the `type` as `HNSW` for Hierarchical Navigable Small World indexing. This will create a vector index for the attribute. For example: - -```graphql -type Product @table { - id: Long @primaryKey - textEmbeddings: [Float] @indexed(type: "HNSW") -} -``` - -HNSW indexing finds the nearest neighbors to a search vector. To use this, you can query with a `sort` parameter, for example: - -```javascript -let results = Product.search({ - sort: { attribute: 'textEmbeddings', target: searchVector }, - limit: 5, // get the five nearest neighbors -}); -``` - -This can be used in combination with other conditions as well, for example: - -```javascript -let results = Product.search({ - conditions: [{ attribute: 'price', comparator: 'lt', value: 50 }], - sort: { attribute: 'textEmbeddings', target: searchVector }, - limit: 5, // get the five nearest neighbors -}); -``` - -HNSW supports several additional arguments to the `@indexed` directive to adjust the HNSW parameters: - -- `distance` - Define the distance function. This can be set to 'euclidean' or 'cosine' (uses negative of cosine similarity). The default is cosine. -- `efConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors. A higher value can yield better recall, and a lower value can have better performance. If `efSearchConstruction` is set, this is only applied to indexing. The default is 100. -- `M` - The preferred number of connections at each layer in the HNSW graph. A higher number uses more space but can be helpful when the intrinsic dimensionality of the data is higher. A lower number can be more efficient. The default is 16. -- `optimizeRouting` - This uses a heuristic to avoid graph connections that match existing indirect connections (connections through another node). This can yield more efficient graph traversals for the same M setting. This is a number between 0 and 1 and a higher value will more aggressively omit connections with alternate paths. Setting this to 0 will disable route optimizing and follow the traditional HNSW algorithm for creating connections. The default is 0.5. -- `mL` - The normalization factor for level generation, by default this is computed from `M`. -- `efSearchConstruction` - Maximum number of nodes to keep in the list for finding nearest neighbors for searching. The default is 50. - -For exmpale - -```graphql -type Product @table { - id: Long @primaryKey - textEmbeddings: [Float] @indexed(type: "HNSW", distance: "euclidean", optimizeRouting: 0, efSearchConstruction: 100) -} -``` - -#### `@createdTime` - -The `@createdTime` directive indicates that this property should be assigned a timestamp of the creation time of the record (in epoch milliseconds). - -#### `@updatedTime` - -The `@updatedTime` directive indicates that this property should be assigned a timestamp of each updated time of the record (in epoch milliseconds). - -#### `@sealed` - -The `@sealed` directive specifies that no additional properties should be allowed on records besides those specified in the type itself. - -### Defined vs Dynamic Schemas - -If you do not define a schema for a table and create a table through the operations API (without specifying attributes) or studio, such a table will not have a defined schema and will follow the behavior of a ["dynamic-schema" table](../../reference/dynamic-schema). It is generally best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity. - -### Field Types - -Harper supports the following field types in addition to user defined (object) types: - -- `String`: String/text -- `Int`: A 32-bit signed integer (from -2147483648 to 2147483647) -- `Long`: A 54-bit signed integer (from -9007199254740992 to 9007199254740992) -- `Float`: Any number (any number that can be represented as a [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format). Note that all numbers are stored in the most compact representation available) -- `BigInt`: Any integer (negative or positive) with less than 300 digits (Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately) -- `Boolean`: true or false -- `ID`: A string (but indicates it is not intended to be human readable) -- `Any`: Any primitive, object, or array is allowed -- `Date`: A Date object -- `Bytes`: Binary data as a Buffer or Uint8Array -- `Blob`: Binary data as a [Blob](../../reference/blob), designed for large blocks of data that can be streamed. It is recommend that you use this for binary data that will typically be larger than 20KB. - -#### Renaming Tables - -It is important to note that Harper does not currently support renaming tables. If you change the name of a table in your schema definition, this will result in the creation of a new, empty table. - -### OpenAPI Specification - -_The_ [_OpenAPI Specification_](https://spec.openapis.org/oas/v3.1.0) _defines a standard, programming language-agnostic interface description for HTTP APIs, which allows both humans and computers to discover and understand the capabilities of a service without requiring access to source code, additional documentation, or inspection of network traffic._ - -If a set of endpoints are configured through a Harper GraphQL schema, those endpoints can be described by using a default REST endpoint called `GET /openapi`. - -_Note: The `/openapi` endpoint should only be used as a starting guide, it may not cover all the elements of an endpoint._ diff --git a/versioned_docs/version-4.7/developers/applications/index.md b/versioned_docs/version-4.7/developers/applications/index.md deleted file mode 100644 index 5f498f0e..00000000 --- a/versioned_docs/version-4.7/developers/applications/index.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -title: Applications ---- - -# Applications - -Harper is more than a database, it's a distributed clustering platform allowing you to package your schema, endpoints and application logic and deploy them to an entire fleet of Harper instances optimized for on-the-edge scalable data delivery. - -In this guide, we are going to explore the evermore extensible architecture that Harper provides by building a Harper application, a fundamental building-block of the Harper ecosystem. - -When working through this guide, we recommend you use the [Harper Application Template](https://github.com/HarperDB/application-template) repo as a reference. - -Before we get started, let's clarify some terminology that is used throughout the documentation. - -**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. The application you will build here is a component. In addition to applications, components also encompass extensions. - -> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. - -**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. - -**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. - -All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. - -Extensions can also depend on other extensions. For example, the [`@harperdb/apollo`](https://github.com/HarperDB/apollo) extension depends on the built-in `graphqlSchema` extension to create a cache table for Apollo queries. Applications can then use the `@harperdb/apollo` extension to implement an Apollo GraphQL backend server. - -```mermaid -flowchart TD - subgraph Applications - direction TB - NextJSApp["Next.js App"] - ApolloApp["Apollo App"] - CustomResource["Custom Resource"] - end - - subgraph Extensions - direction TB - subgraph Custom - NextjsExt["@harperdb/nextjs"] - ApolloExt["@harperdb/apollo"] - end - subgraph Built-In - GraphqlSchema["graphqlSchema"] - JsResource["jsResource"] - Rest["rest"] - end - end - - subgraph Core - direction TB - Database["database"] - FileSystem["file-system"] - Networking["networking"] - end - - NextJSApp --> NextjsExt - ApolloApp --> ApolloExt - CustomResource --> JsResource & GraphqlSchema & Rest - - NextjsExt --> Networking - NextjsExt --> FileSystem - ApolloExt --> GraphqlSchema - ApolloExt --> Networking - - GraphqlSchema --> Database - JsResource --> Database - Rest --> Networking -``` - -> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](../reference/components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -Beyond applications and extensions, components are further classified as built-in or custom. **Built-in** components are included with Harper by default and can be directly referenced by their name. The `graphqlSchema`, `rest`, and `jsResource` extensions used in the previous application example are all examples of built-in extensions. **Custom** components must use external references, generally npm or GitHub packages, and are often included as dependencies within the `package.json` of the component. - -> Harper maintains a number of custom components that are available on `npm` and `GitHub`, such as the [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) extension or the [`@harperdb/status-check`](https://github.com/HarperDB/status-check) application. - -Harper does not currently include any built-in applications, making "custom applications" a bit redundant. Generally, we just say "application". However, there is a multitude of both built-in and custom extensions, and so the documentation refers to them as such. A complete list of built-in extensions is available in the [Built-In Extensions](../reference/components/built-in-extensions) documentation page, and the list of custom extensions and applications is available below. - -This guide is going to walk you through building a basic Harper application using a set of built-in extensions. - -> The Reference -> Components section of the documentation contains a [complete reference for all aspects of components](../reference/components), applications, extensions, and more. - -## Custom Functionality with JavaScript - -[The getting started guide](/learn/) covers how to build an application entirely through schema configuration. However, if your application requires more custom functionality, you will probably want to employ your own JavaScript modules to implement more specific features and interactions. This gives you tremendous flexibility and control over how data is accessed and modified in Harper. Let's take a look at how we can use JavaScript to extend and define "resources" for custom functionality. Let's add a property to the dog records when they are returned, that includes their age in human years. In Harper, data is accessed through our [Resource API](../reference/resources/), a standard interface to access data sources, tables, and make them available to endpoints. Database tables are `Resource` classes, and so extending the function of a table is as simple as extending their class. - -To define custom (JavaScript) resources as endpoints, we need to create a `resources.js` module (this goes in the root of your application folder). And then endpoints can be defined with Resource classes that `export`ed. This can be done in addition to, or in lieu of the `@export`ed types in the schema.graphql. If you are exporting and extending a table you defined in the schema make sure you remove the `@export` from the schema so that don't export the original table or resource to the same endpoint/path you are exporting with a class. Resource classes have methods that correspond to standard HTTP/REST methods, like `get`, `post`, `patch`, and `put` to implement specific handling for any of these methods (for tables they all have default implementations). To do this, we get the `Dog` class from the defined tables, extend it, and export it: - -```javascript -// resources.js: -const { Dog } = tables; // get the Dog table from the Harper provided set of tables (in the default database) - -export class DogWithHumanAge extends Dog { - static loadAsInstance = false; - async get(target) { - const record = await super.get(target); - return { - ...record, // include all properties from the record - humanAge: 15 + record.age * 5, // silly calculation of human age equivalent - }; - } -} -``` - -Here we exported the `DogWithHumanAge` class (exported with the same name), which directly maps to the endpoint path. Therefore, now we have a `/DogWithHumanAge/` endpoint based on this class, just like the direct table interface that was exported as `/Dog/`, but the new endpoint will return objects with the computed `humanAge` property. Resource classes provide getters/setters for every defined attribute so that accessing instance properties like `age`, will get the value from the underlying record. The instance holds information about the primary key of the record so updates and actions can be applied to the correct record. And changing or assigning new properties can be saved or included in the resource as it returned and serialized. The `return super.get(query)` call at the end allows for any query parameters to be applied to the resource, such as selecting individual properties (with a [`select` query parameter](./rest#selectproperties)). - -Often we may want to incorporate data from other tables or data sources in your data models. Next, let's say that we want a `Breed` table that holds detailed information about each breed, and we want to add that information to the returned dog object. We might define the Breed table as (back in schema.graphql): - -```graphql -type Breed @table { - name: String @primaryKey - description: String @indexed - lifespan: Int - averageWeight: Float -} -``` - -We use the new table's (static) `get()` method to retrieve a breed by id. Harper will maintain the current context, ensuring that we are accessing the data atomically, in a consistent snapshot across tables. This provides: - -1. Automatic tracking of most recently updated timestamps across resources for caching purposes -2. Sharing of contextual metadata (like user who requested the data) -3. Transactional atomicity for any writes (not needed in this get operation, but important for other operations) - -The resource methods are automatically wrapped with a transaction and will automatically commit the changes when the method finishes. This allows us to fully utilize multiple resources in our current transaction. With our own snapshot of the database for the Dog and Breed table we can then access data like this: - -```javascript -//resource.js: -const { Dog, Breed } = tables; // get the Breed table too -export class DogWithBreed extends Dog { - static loadAsInstance = false; - async get(target) { - // get the Dog record - const record = await super.get(target); - // get the Breed record - let breedDescription = await Breed.get(record.breed); - return { - ...record, - breedDescription, - }; - } -} -``` - -The call to `Breed.get` will return an instance of the `Breed` resource class, which holds the record specified the provided id/primary key. Like the `Dog` instance, we can access or change properties on the Breed instance. - -Here we have focused on customizing how we retrieve data, but we may also want to define custom actions for writing data. While HTTP PUT method has a specific semantic definition (replace current record), a common method for custom actions is through the HTTP POST method. the POST method has much more open-ended semantics and is a good choice for custom actions. POST requests are handled by our Resource's post() method. Let's say that we want to define a POST handler that adds a new trick to the `tricks` array to a specific instance. We might do it like this, and specify an action to be able to differentiate actions: - -```javascript -export class CustomDog extends Dog { - static loadAsInstance = false; - async post(target, data) { - if (data.action === 'add-trick') { - const record = this.update(target); - record.tricks.push(data.trick); - } - } -} -``` - -And a POST request to /CustomDog/ would call this `post` method. The Resource class then automatically tracks changes you make to your resource instances and saves those changes when this transaction is committed (again these methods are automatically wrapped in a transaction and committed once the request handler is finished). So when you push data on to the `tricks` array, this will be recorded and persisted when this method finishes and before sending a response to the client. - -The `post` method automatically marks the current instance as being update. However, you can also explicitly specify that you are changing a resource by calling the `update()` method. If you want to modify a resource instance that you retrieved through a `get()` call (like `Breed.get()` call above), you can call its `update()` method to ensure changes are saved (and will be committed in the current transaction). - -We can also define custom authorization capabilities. For example, we might want to specify that only the owner of a dog can make updates to a dog. We could add logic to our `post()` method or `put()` method to do this. For example, we might do this: - -```javascript -export class CustomDog extends Dog { - static loadAsInstance = false; - async post(target, data) { - if (data.action === 'add-trick') { - const context = this.getContext(); - // if we want to skip the default permission checks, we can turn off checkPermissions: - target.checkPermissions = false; - const record = this.update(target); - // and do our own/custom permission check: - if (record.owner !== context.user?.username) { - throw new Error('Can not update this record'); - } - record.tricks.push(data.trick); - } - } -} -``` - -Any methods that are not defined will fall back to Harper's default authorization procedure based on users' roles. If you are using/extending a table, this is based on Harper's [role based access](./security/users-and-roles). If you are extending the base `Resource` class, the default access requires super user permission. - -You can also use the `default` export to define the root path resource handler. For example: - -```javascript -// resources.json -export default class CustomDog extends Dog { - ... -``` - -This will allow requests to url like / to be directly resolved to this resource. - -## Define Custom Data Sources - -We can also directly implement the Resource class and use it to create new data sources from scratch that can be used as endpoints. Custom resources can also be used as caching sources. Let's say that we defined a `Breed` table that was a cache of information about breeds from another source. We could implement a caching table like: - -```javascript -const { Breed } = tables; // our Breed table -class BreedSource extends Resource { - // define a data source - async get(target) { - return (await fetch(`https://best-dog-site.com/${target}`)).json(); - } -} -// define that our breed table is a cache of data from the data source above, with a specified expiration -Breed.sourcedFrom(BreedSource, { expiration: 3600 }); -``` - -The [caching documentation](applications/caching) provides much more information on how to use Harper's powerful caching capabilities and set up data sources. - -Harper provides a powerful JavaScript API with significant capabilities that go well beyond a "getting started" guide. See our documentation for more information on using the [`globals`](../reference/globals) and the [Resource interface](../reference/resources). - -## Configuring Applications/Components - -For complete information of configuring applications, refer to the [Component Configuration](../reference/components) reference page. - -## Define Fastify Routes - -Exporting resource will generate full RESTful endpoints. But, you may prefer to define endpoints through a framework. Harper includes a resource plugin for defining routes with the Fastify web framework. Fastify is a full-featured framework with many plugins, that provides sophisticated route definition capabilities. - -By default, applications are configured to load any modules in the `routes` directory (matching `routes/*.js`) with Fastify's autoloader, which will allow these modules to export a function to define fastify routes. See the [defining routes documentation](applications/define-routes) for more information on how to create Fastify routes. - -However, Fastify is not as fast as Harper's RESTful endpoints (about 10%-20% slower/more-overhead), nor does it automate the generation of a full uniform interface with correct RESTful header interactions (for caching control), so generally the Harper's REST interface is recommended for optimum performance and ease of use. - -## Restarting Your Instance - -Generally, Harper will auto-detect when files change and auto-restart the appropriate threads. However, if there are changes that aren't detected, you may manually restart, with the `restart_service` operation: - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` diff --git a/versioned_docs/version-4.7/developers/applications/web-applications.md b/versioned_docs/version-4.7/developers/applications/web-applications.md deleted file mode 100644 index 02fd1893..00000000 --- a/versioned_docs/version-4.7/developers/applications/web-applications.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Web Applications on Harper ---- - -# Web Applications on Harper - -Harper is an efficient, capable, and robust platform for developing web applications, with numerous capabilities designed -specifically for optimized web application delivery. In addition, there are a number of tools and frameworks that can be used -with Harper to create web applications with standard best-practice design and development patterns. Running these frameworks -on Harper can unlock tremendous scalability and performance benefits by leveraging Harper's built-in multi-threading, -caching, and distributed design. - -Harper's unique ability to run JavaScript code directly on the server side, combined with its built-in database for data storage, querying, and caching -allows you to create full-featured web applications with a single platform. This eliminates the overhead of legacy solutions that -require separate application servers, databases, and caching layers, and their requisite communication overhead and latency, while -allowing the full stack to deployed to distributed locations with full local response handling, providing an incredibly low latency web experience. - -## Web Application Frameworks - -With built-in caching mechanisms, and an easy-to-use JavaScript API for interacting with data, creating full-featured applications -using popular frameworks is a simple and straightforward process. - -Get started today with one of our examples: - -- [Next.js](https://github.com/HarperDB/nextjs-example) -- [React SSR](https://github.com/HarperDB/react-ssr-example) -- [Vue SSR](https://github.com/HarperDB/vue-ssr-example) -- [Svelte SSR](https://github.com/HarperDB/svelte-ssr-example) -- [Solid SSR](https://github.com/HarperDB/solid-ssr-example) - -## Cookie Support - -Harper includes support for authenticated sessions using cookies. This allows you to create secure, authenticated web applications -using best-practice security patterns, allowing users to login and maintain a session without any credential storage on the client side -that can be compromised. A login endpoint can be defined by exporting a resource and calling the `login` method on the request object. For example, this could be a login endpoint in your resources.js file: - -```javascript -export class Login extends Resource { - async post(data) { - const { username, password } = data; - await request.login(username, password); - return { message: 'Logged in!' }; - } -} -``` - -This endpoint can be called from the client side using a standard fetch request, a cookie will be returned, and the session will be maintained by Harper. -This allows web applications to directly interact with Harper and database resources, without needing to go through extra layers of authentication handling. - -## Browser Caching Negotiation - -Browsers support caching negotiation with revalidation, which allows requests for locally cached data to be sent to servers with a tag or timestamp. Harper REST functionality can fully interact with these headers, and return `304 Not Modified` response based on prior `Etag` sent in headers. It is highly recommended that you utilize the [REST interface](../rest) for accessing tables, as it facilitates this downstream browser caching. Timestamps are recorded with all records and are then returned [as the `ETag` in the response](../rest#cachingconditional-requests). Utilizing this browser caching can greatly reduce the load on your server and improve the performance of your web application by being able to instantly use locally cached data after revalidation from the server. - -## Built-in Cross-Origin Resource Sharing (CORS) - -Harper includes built-in support for Cross-Origin Resource Sharing (CORS), which allows you to define which domains are allowed to access your Harper instance. This is a critical security feature for web applications, as it prevents unauthorized access to your data from other domains, while allowing cross-domain access from known hosts. You can define the allowed domains in your [Harper configuration file](../../deployments/configuration#http), and Harper will automatically handle the CORS headers for you. - -## More Resources - -Make sure to check out our developer videos too: - -- [Next.js on Harper | Step-by-Step Guide for Next Level Next.js Performance](https://youtu.be/GqLEwteFJYY) -- [Server-side Rendering (SSR) with Multi-Tier Cache Demo](https://youtu.be/L-tnBNhO9Fc) diff --git a/versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md b/versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md deleted file mode 100644 index c4254430..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md +++ /dev/null @@ -1,1775 +0,0 @@ ---- -title: Advanced JSON SQL Examples ---- - -# Advanced JSON SQL Examples - -## Create movies database - -Create a new database called `movies` using the `create_database` operation. - -_Note: Creating a database is optional, if one is not created Harper will default to using a database named `data`_ - -### Body - -```json -{ - "operation": "create_database", - "database": "movies" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'movies' successfully created" -} -``` - ---- - -## Create movie Table - -Creates a new table called "movie" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "movie", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.movie' successfully created." -} -``` - ---- - -## Create credits Table - -Creates a new table called "credits" inside the database "movies" using the ‘create_table’ operation. - -### Body - -```json -{ - "operation": "create_table", - "database": "movies", - "table": "credits", - "primary_key": "movie_id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'movies.credits' successfully created." -} -``` - ---- - -## Bulk Insert movie Via CSV - -Inserts data from a hosted CSV file into the "movie" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "movie", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/movie.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 1889eee4-23c1-4945-9bb7-c805fc20726c" -} -``` - ---- - -## Bulk Insert credits Via CSV - -Inserts data from a hosted CSV file into the "credits" table using the 'csv_url_load' operation. - -### Body - -```json -{ - "operation": "csv_url_load", - "database": "movies", - "table": "credits", - "csv_url": "https://search-json-sample-data.s3.us-east-2.amazonaws.com/credits.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3a14cd74-67f3-41e9-8ccd-45ffd0addc2c", - "job_id": "3a14cd74-67f3-41e9-8ccd-45ffd0addc2c" -} -``` - ---- - -## View raw data - -In the following example we will be running expressions on the keywords & production_companies attributes, so for context we are displaying what the raw data looks like. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, keywords, production_companies FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - { - "id": 305, - "name": "moon" - }, - { - "id": 697, - "name": "loss of loved one" - }, - { - "id": 839, - "name": "planet mars" - }, - { - "id": 14626, - "name": "astronaut" - }, - { - "id": 157265, - "name": "moon colony" - }, - { - "id": 162429, - "name": "solar system" - }, - { - "id": 240119, - "name": "father son relationship" - }, - { - "id": 244256, - "name": "near future" - }, - { - "id": 257878, - "name": "planet neptune" - }, - { - "id": 260089, - "name": "space walk" - } - ], - "production_companies": [ - { - "id": 490, - "name": "New Regency Productions", - "origin_country": "" - }, - { - "id": 79963, - "name": "Keep Your Head", - "origin_country": "" - }, - { - "id": 73492, - "name": "MadRiver Pictures", - "origin_country": "" - }, - { - "id": 81, - "name": "Plan B Entertainment", - "origin_country": "US" - }, - { - "id": 30666, - "name": "RT Features", - "origin_country": "BR" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - { - "id": 3070, - "name": "mercenary" - }, - { - "id": 4110, - "name": "mumbai (bombay), india" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 9730, - "name": "crime boss" - }, - { - "id": 11107, - "name": "rescue mission" - }, - { - "id": 18712, - "name": "based on graphic novel" - }, - { - "id": 265216, - "name": "dhaka (dacca), bangladesh" - } - ], - "production_companies": [ - { - "id": 106544, - "name": "AGBO", - "origin_country": "US" - }, - { - "id": 109172, - "name": "Thematic Entertainment", - "origin_country": "US" - }, - { - "id": 92029, - "name": "TGIM Films", - "origin_country": "US" - } - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": [ - { - "id": 10873, - "name": "school" - } - ], - "production_companies": [] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": [ - { - "id": 2651, - "name": "nanotechnology" - }, - { - "id": 9715, - "name": "superhero" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 164218, - "name": "psychotronic" - }, - { - "id": 255024, - "name": "shared universe" - }, - { - "id": 258575, - "name": "valiant comics" - } - ], - "production_companies": [ - { - "id": 34, - "name": "Sony Pictures", - "origin_country": "US" - }, - { - "id": 10246, - "name": "Cross Creek Pictures", - "origin_country": "US" - }, - { - "id": 6573, - "name": "Mimran Schur Pictures", - "origin_country": "US" - }, - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 103673, - "name": "The Hideaway Entertainment", - "origin_country": "US" - }, - { - "id": 124335, - "name": "Valiant Entertainment", - "origin_country": "US" - }, - { - "id": 5, - "name": "Columbia Pictures", - "origin_country": "US" - }, - { - "id": 1225, - "name": "One Race", - "origin_country": "US" - }, - { - "id": 30148, - "name": "Bona Film Group", - "origin_country": "CN" - } - ] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - { - "id": 818, - "name": "based on novel or book" - }, - { - "id": 4542, - "name": "gold rush" - }, - { - "id": 15162, - "name": "dog" - }, - { - "id": 155821, - "name": "sled dogs" - }, - { - "id": 189390, - "name": "yukon" - }, - { - "id": 207928, - "name": "19th century" - }, - { - "id": 259987, - "name": "cgi animation" - }, - { - "id": 263806, - "name": "1890s" - } - ], - "production_companies": [ - { - "id": 787, - "name": "3 Arts Entertainment", - "origin_country": "US" - }, - { - "id": 127928, - "name": "20th Century Studios", - "origin_country": "US" - }, - { - "id": 22213, - "name": "TSG Entertainment", - "origin_country": "US" - } - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - { - "id": 282, - "name": "video game" - }, - { - "id": 6054, - "name": "friendship" - }, - { - "id": 10842, - "name": "good vs evil" - }, - { - "id": 41645, - "name": "based on video game" - }, - { - "id": 167043, - "name": "road movie" - }, - { - "id": 172142, - "name": "farting" - }, - { - "id": 188933, - "name": "bar fight" - }, - { - "id": 226967, - "name": "amistad" - }, - { - "id": 245230, - "name": "live action remake" - }, - { - "id": 258111, - "name": "fantasy" - }, - { - "id": 260223, - "name": "videojuego" - } - ], - "production_companies": [ - { - "id": 333, - "name": "Original Film", - "origin_country": "US" - }, - { - "id": 10644, - "name": "Blur Studios", - "origin_country": "US" - }, - { - "id": 77884, - "name": "Marza Animation Planet", - "origin_country": "JP" - }, - { - "id": 4, - "name": "Paramount", - "origin_country": "US" - }, - { - "id": 113750, - "name": "SEGA", - "origin_country": "JP" - }, - { - "id": 100711, - "name": "DJ2 Entertainment", - "origin_country": "" - }, - { - "id": 24955, - "name": "Paramount Animation", - "origin_country": "US" - } - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": [ - { - "id": 849, - "name": "dc comics" - }, - { - "id": 9717, - "name": "based on comic" - }, - { - "id": 187056, - "name": "woman director" - }, - { - "id": 229266, - "name": "dc extended universe" - } - ], - "production_companies": [ - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 82968, - "name": "LuckyChap Entertainment", - "origin_country": "GB" - }, - { - "id": 103462, - "name": "Kroll & Co Entertainment", - "origin_country": "US" - }, - { - "id": 174, - "name": "Warner Bros. Pictures", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - }, - { - "id": 128064, - "name": "DC Films", - "origin_country": "US" - }, - { - "id": 101831, - "name": "Clubhouse Pictures", - "origin_country": "US" - } - ] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": [ - { - "id": 849, - "name": "dc comics" - } - ], - "production_companies": [ - { - "id": 2785, - "name": "Warner Bros. Animation", - "origin_country": "US" - }, - { - "id": 9993, - "name": "DC Entertainment", - "origin_country": "US" - }, - { - "id": 429, - "name": "DC Comics", - "origin_country": "US" - } - ] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - { - "id": 1353, - "name": "underground" - }, - { - "id": 5318, - "name": "seoul" - }, - { - "id": 5732, - "name": "birthday party" - }, - { - "id": 5752, - "name": "private lessons" - }, - { - "id": 9866, - "name": "basement" - }, - { - "id": 10453, - "name": "con artist" - }, - { - "id": 11935, - "name": "working class" - }, - { - "id": 12565, - "name": "psychological thriller" - }, - { - "id": 13126, - "name": "limousine driver" - }, - { - "id": 14514, - "name": "class differences" - }, - { - "id": 14864, - "name": "rich poor" - }, - { - "id": 17997, - "name": "housekeeper" - }, - { - "id": 18015, - "name": "tutor" - }, - { - "id": 18035, - "name": "family" - }, - { - "id": 33421, - "name": "crime family" - }, - { - "id": 173272, - "name": "flood" - }, - { - "id": 188861, - "name": "smell" - }, - { - "id": 198673, - "name": "unemployed" - }, - { - "id": 237462, - "name": "wealthy family" - } - ], - "production_companies": [ - { - "id": 7036, - "name": "CJ Entertainment", - "origin_country": "KR" - }, - { - "id": 4399, - "name": "Barunson E&A", - "origin_country": "KR" - } - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": [ - { - "id": 161176, - "name": "space opera" - } - ], - "production_companies": [ - { - "id": 1, - "name": "Lucasfilm", - "origin_country": "US" - }, - { - "id": 11461, - "name": "Bad Robot", - "origin_country": "US" - }, - { - "id": 2, - "name": "Walt Disney Pictures", - "origin_country": "US" - }, - { - "id": 120404, - "name": "British Film Commission", - "origin_country": "" - } - ] - } -] -``` - ---- - -## Simple search_json call - -This query uses search_json to convert the keywords object array to a simple string array. The expression '[name]' tells the function to extract all values for the name attribute and wrap them in an array. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, rank, search_json('[name]', keywords) as keywords FROM movies.movie ORDER BY rank LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Ad Astra", - "rank": 1, - "keywords": [ - "moon", - "loss of loved one", - "planet mars", - "astronaut", - "moon colony", - "solar system", - "father son relationship", - "near future", - "planet neptune", - "space walk" - ] - }, - { - "title": "Extraction", - "rank": 2, - "keywords": [ - "mercenary", - "mumbai (bombay), india", - "based on comic", - "crime boss", - "rescue mission", - "based on graphic novel", - "dhaka (dacca), bangladesh" - ] - }, - { - "title": "To the Beat! Back 2 School", - "rank": 3, - "keywords": ["school"] - }, - { - "title": "Bloodshot", - "rank": 4, - "keywords": ["nanotechnology", "superhero", "based on comic", "psychotronic", "shared universe", "valiant comics"] - }, - { - "title": "The Call of the Wild", - "rank": 5, - "keywords": [ - "based on novel or book", - "gold rush", - "dog", - "sled dogs", - "yukon", - "19th century", - "cgi animation", - "1890s" - ] - }, - { - "title": "Sonic the Hedgehog", - "rank": 6, - "keywords": [ - "video game", - "friendship", - "good vs evil", - "based on video game", - "road movie", - "farting", - "bar fight", - "amistad", - "live action remake", - "fantasy", - "videojuego" - ] - }, - { - "title": "Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)", - "rank": 7, - "keywords": ["dc comics", "based on comic", "woman director", "dc extended universe"] - }, - { - "title": "Justice League Dark: Apokolips War", - "rank": 8, - "keywords": ["dc comics"] - }, - { - "title": "Parasite", - "rank": 9, - "keywords": [ - "underground", - "seoul", - "birthday party", - "private lessons", - "basement", - "con artist", - "working class", - "psychological thriller", - "limousine driver", - "class differences", - "rich poor", - "housekeeper", - "tutor", - "family", - "crime family", - "flood", - "smell", - "unemployed", - "wealthy family" - ] - }, - { - "title": "Star Wars: The Rise of Skywalker", - "rank": 10, - "keywords": ["space opera"] - } -] -``` - ---- - -## Use search_json in a where clause - -This example shows how we can use SEARCH_JSON to filter out records in a WHERE clause. The production_companies attribute holds an object array of companies that produced each movie, we want to only see movies which were produced by Marvel Studios. Our expression is a filter '$[name="Marvel Studios"]' this tells the function to iterate the production_companies array and only return entries where the name is "Marvel Studios". - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT title, release_date FROM movies.movie where search_json('$[name=\"Marvel Studios\"]', production_companies) IS NOT NULL ORDER BY release_date" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Iron Man", - "release_date": "2008-04-30" - }, - { - "title": "The Incredible Hulk", - "release_date": "2008-06-12" - }, - { - "title": "Iron Man 2", - "release_date": "2010-04-28" - }, - { - "title": "Thor", - "release_date": "2011-04-21" - }, - { - "title": "Captain America: The First Avenger", - "release_date": "2011-07-22" - }, - { - "title": "Marvel One-Shot: The Consultant", - "release_date": "2011-09-12" - }, - { - "title": "Marvel One-Shot: A Funny Thing Happened on the Way to Thor's Hammer", - "release_date": "2011-10-25" - }, - { - "title": "The Avengers", - "release_date": "2012-04-25" - }, - { - "title": "Marvel One-Shot: Item 47", - "release_date": "2012-09-13" - }, - { - "title": "Iron Man 3", - "release_date": "2013-04-18" - }, - { - "title": "Marvel One-Shot: Agent Carter", - "release_date": "2013-09-08" - }, - { - "title": "Thor: The Dark World", - "release_date": "2013-10-29" - }, - { - "title": "Marvel One-Shot: All Hail the King", - "release_date": "2014-02-04" - }, - { - "title": "Marvel Studios: Assembling a Universe", - "release_date": "2014-03-18" - }, - { - "title": "Captain America: The Winter Soldier", - "release_date": "2014-03-20" - }, - { - "title": "Guardians of the Galaxy", - "release_date": "2014-07-30" - }, - { - "title": "Avengers: Age of Ultron", - "release_date": "2015-04-22" - }, - { - "title": "Ant-Man", - "release_date": "2015-07-14" - }, - { - "title": "Captain America: Civil War", - "release_date": "2016-04-27" - }, - { - "title": "Team Thor", - "release_date": "2016-08-28" - }, - { - "title": "Doctor Strange", - "release_date": "2016-10-25" - }, - { - "title": "Guardians of the Galaxy Vol. 2", - "release_date": "2017-04-19" - }, - { - "title": "Spider-Man: Homecoming", - "release_date": "2017-07-05" - }, - { - "title": "Thor: Ragnarok", - "release_date": "2017-10-25" - }, - { - "title": "Black Panther", - "release_date": "2018-02-13" - }, - { - "title": "Avengers: Infinity War", - "release_date": "2018-04-25" - }, - { - "title": "Ant-Man and the Wasp", - "release_date": "2018-07-04" - }, - { - "title": "Captain Marvel", - "release_date": "2019-03-06" - }, - { - "title": "Avengers: Endgame", - "release_date": "2019-04-24" - }, - { - "title": "Spider-Man: Far from Home", - "release_date": "2019-06-28" - }, - { - "title": "Black Widow", - "release_date": "2020-10-28" - }, - { - "title": "Untitled Spider-Man 3", - "release_date": "2021-11-04" - }, - { - "title": "Thor: Love and Thunder", - "release_date": "2022-02-10" - }, - { - "title": "Doctor Strange in the Multiverse of Madness", - "release_date": "2022-03-23" - }, - { - "title": "Untitled Marvel Project (3)", - "release_date": "2022-07-29" - }, - { - "title": "Guardians of the Galaxy Vol. 3", - "release_date": "2023-02-16" - } -] -``` - ---- - -## Use search_json to show the movies with the largest casts - -This example shows how we can use SEARCH_JSON to perform a simple calculation on JSON and order by the results. The cast attribute holds an object array of details around the cast of a movie. We use the expression '$count(id)' that counts each id and returns the value back which we alias in SQL as cast_size which in turn gets used to sort the rows. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT movie_title, search_json('$count(id)', `cast`) as cast_size FROM movies.credits ORDER BY cast_size DESC LIMIT 10" -} -``` - -### Response: 200 - -```json -[ - { - "movie_title": "Around the World in Eighty Days", - "cast_size": 312 - }, - { - "movie_title": "And the Oscar Goes To...", - "cast_size": 259 - }, - { - "movie_title": "Rock of Ages", - "cast_size": 223 - }, - { - "movie_title": "Mr. Smith Goes to Washington", - "cast_size": 213 - }, - { - "movie_title": "Les Misérables", - "cast_size": 208 - }, - { - "movie_title": "Jason Bourne", - "cast_size": 201 - }, - { - "movie_title": "The Muppets", - "cast_size": 191 - }, - { - "movie_title": "You Don't Mess with the Zohan", - "cast_size": 183 - }, - { - "movie_title": "The Irishman", - "cast_size": 173 - }, - { - "movie_title": "Spider-Man: Far from Home", - "cast_size": 173 - } -] -``` - ---- - -## search_json as a condition, in a select with a table join - -This example shows how we can use SEARCH_JSON to find movies where at least of 2 our favorite actors from Marvel films have acted together then list the movie, its overview, release date, and the actors names and their characters. The WHERE clause performs a count on credits.cast attribute that have the matching actors. The SELECT performs the same filter on the cast attribute and performs a transform on each object to just return the actor's name and their character. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT m.title, m.overview, m.release_date, search_json('$[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]].{\"actor\": name, \"character\": character}', c.`cast`) as characters FROM movies.credits c INNER JOIN movies.movie m ON c.movie_id = m.id WHERE search_json('$count($[name in [\"Robert Downey Jr.\", \"Chris Evans\", \"Scarlett Johansson\", \"Mark Ruffalo\", \"Chris Hemsworth\", \"Jeremy Renner\", \"Clark Gregg\", \"Samuel L. Jackson\", \"Gwyneth Paltrow\", \"Don Cheadle\"]])', c.`cast`) >= 2" -} -``` - -### Response: 200 - -```json -[ - { - "title": "Out of Sight", - "overview": "Meet Jack Foley, a smooth criminal who bends the law and is determined to make one last heist. Karen Sisco is a federal marshal who chooses all the right moves … and all the wrong guys. Now they're willing to risk it all to find out if there's more between them than just the law.", - "release_date": "1998-06-26", - "characters": [ - { - "actor": "Don Cheadle", - "character": "Maurice Miller" - }, - { - "actor": "Samuel L. Jackson", - "character": "Hejira Henry (uncredited)" - } - ] - }, - { - "title": "Iron Man", - "overview": "After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.", - "release_date": "2008-04-30", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Captain America: The First Avenger", - "overview": "During World War II, Steve Rogers is a sickly man from Brooklyn who's transformed into super-soldier Captain America to aid in the war effort. Rogers must stop the Red Skull – Adolf Hitler's ruthless head of weaponry, and the leader of an organization that intends to use a mysterious device of untold powers for world domination.", - "release_date": "2011-07-22", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "In Good Company", - "overview": "Dan Foreman is a seasoned advertisement sales executive at a high-ranking publication when a corporate takeover results in him being placed under naive supervisor Carter Duryea, who is half his age. Matters are made worse when Dan's new supervisor becomes romantically involved with his daughter an 18 year-old college student Alex.", - "release_date": "2004-12-29", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Alex Foreman" - }, - { - "actor": "Clark Gregg", - "character": "Mark Steckle" - } - ] - }, - { - "title": "Zodiac", - "overview": "The true story of the investigation of the \"Zodiac Killer\", a serial killer who terrified the San Francisco Bay Area, taunting police with his ciphers and letters. The case becomes an obsession for three men as their lives and careers are built and destroyed by the endless trail of clues.", - "release_date": "2007-03-02", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Dave Toschi" - }, - { - "actor": "Robert Downey Jr.", - "character": "Paul Avery" - } - ] - }, - { - "title": "Hard Eight", - "overview": "A stranger mentors a young Reno gambler who weds a hooker and befriends a vulgar casino regular.", - "release_date": "1996-02-28", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Clementine" - }, - { - "actor": "Samuel L. Jackson", - "character": "Jimmy" - } - ] - }, - { - "title": "The Spirit", - "overview": "Down these mean streets a man must come. A hero born, murdered, and born again. A Rookie cop named Denny Colt returns from the beyond as The Spirit, a hero whose mission is to fight against the bad forces from the shadows of Central City. The Octopus, who kills anyone unfortunate enough to see his face, has other plans; he is going to wipe out the entire city.", - "release_date": "2008-12-25", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Silken Floss" - }, - { - "actor": "Samuel L. Jackson", - "character": "Octopuss" - } - ] - }, - { - "title": "S.W.A.T.", - "overview": "Hondo Harrelson recruits Jim Street to join an elite unit of the Los Angeles Police Department. Together they seek out more members, including tough Deke Kay and single mom Chris Sanchez. The team's first big assignment is to escort crime boss Alex Montel to prison. It seems routine, but when Montel offers a huge reward to anyone who can break him free, criminals of various stripes step up for the prize.", - "release_date": "2003-08-08", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Sgt. Dan 'Hondo' Harrelson" - }, - { - "actor": "Jeremy Renner", - "character": "Brian Gamble" - } - ] - }, - { - "title": "Iron Man 2", - "overview": "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", - "release_date": "2010-04-28", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Scarlett Johansson", - "character": "Natalie Rushman / Natasha Romanoff / Black Widow" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - } - ] - }, - { - "title": "Thor", - "overview": "Against his father Odin's will, The Mighty Thor - a powerful but arrogant warrior god - recklessly reignites an ancient war. Thor is cast down to Earth and forced to live among humans as punishment. Once here, Thor learns what it takes to be a true hero when the most dangerous villain of his world sends the darkest forces of Asgard to invade Earth.", - "release_date": "2011-04-21", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye (uncredited)" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - } - ] - }, - { - "title": "View from the Top", - "overview": "A small-town woman tries to achieve her goal of becoming a flight attendant.", - "release_date": "2003-03-21", - "characters": [ - { - "actor": "Gwyneth Paltrow", - "character": "Donna" - }, - { - "actor": "Mark Ruffalo", - "character": "Ted Stewart" - } - ] - }, - { - "title": "The Nanny Diaries", - "overview": "A college graduate goes to work as a nanny for a rich New York family. Ensconced in their home, she has to juggle their dysfunction, a new romance, and the spoiled brat in her charge.", - "release_date": "2007-08-24", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Annie Braddock" - }, - { - "actor": "Chris Evans", - "character": "Hayden \"Harvard Hottie\"" - } - ] - }, - { - "title": "The Perfect Score", - "overview": "Six high school seniors decide to break into the Princeton Testing Center so they can steal the answers to their upcoming SAT tests and all get perfect scores.", - "release_date": "2004-01-30", - "characters": [ - { - "actor": "Chris Evans", - "character": "Kyle" - }, - { - "actor": "Scarlett Johansson", - "character": "Francesca Curtis" - } - ] - }, - { - "title": "The Avengers", - "overview": "When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!", - "release_date": "2012-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - } - ] - }, - { - "title": "Iron Man 3", - "overview": "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", - "release_date": "2013-04-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / Iron Patriot" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner (uncredited)" - } - ] - }, - { - "title": "Marvel One-Shot: The Consultant", - "overview": "Agent Coulson informs Agent Sitwell that the World Security Council wishes Emil Blonsky to be released from prison to join the Avengers Initiative. As Nick Fury doesn't want to release Blonsky, the two agents decide to send a patsy to sabotage the meeting...", - "release_date": "2011-09-12", - "characters": [ - { - "actor": "Clark Gregg", - "character": "Phil Coulson" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark (archive footage)" - } - ] - }, - { - "title": "Thor: The Dark World", - "overview": "Thor fights to restore order across the cosmos… but an ancient race led by the vengeful Malekith returns to plunge the universe back into darkness. Faced with an enemy that even Odin and Asgard cannot withstand, Thor must embark on his most perilous and personal journey yet, one that will reunite him with Jane Foster and force him to sacrifice everything to save us all.", - "release_date": "2013-10-29", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Loki as Captain America (uncredited)" - } - ] - }, - { - "title": "Avengers: Age of Ultron", - "overview": "When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure.", - "release_date": "2015-04-22", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - } - ] - }, - { - "title": "Captain America: The Winter Soldier", - "overview": "After the cataclysmic events in New York with The Avengers, Steve Rogers, aka Captain America is living quietly in Washington, D.C. and trying to adjust to the modern world. But when a S.H.I.E.L.D. colleague comes under attack, Steve becomes embroiled in a web of intrigue that threatens to put the world at risk. Joining forces with the Black Widow, Captain America struggles to expose the ever-widening conspiracy while fighting off professional assassins sent to silence him at every turn. When the full scope of the villainous plot is revealed, Captain America and the Black Widow enlist the help of a new ally, the Falcon. However, they soon find themselves up against an unexpected and formidable enemy—the Winter Soldier.", - "release_date": "2014-03-20", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - } - ] - }, - { - "title": "Thanks for Sharing", - "overview": "A romantic comedy that brings together three disparate characters who are learning to face a challenging and often confusing world as they struggle together against a common demon—sex addiction.", - "release_date": "2013-09-19", - "characters": [ - { - "actor": "Mark Ruffalo", - "character": "Adam" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Phoebe" - } - ] - }, - { - "title": "Chef", - "overview": "When Chef Carl Casper suddenly quits his job at a prominent Los Angeles restaurant after refusing to compromise his creative integrity for its controlling owner, he is left to figure out what's next. Finding himself in Miami, he teams up with his ex-wife, his friend and his son to launch a food truck. Taking to the road, Chef Carl goes back to his roots to reignite his passion for the kitchen -- and zest for life and love.", - "release_date": "2014-05-08", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Molly" - }, - { - "actor": "Robert Downey Jr.", - "character": "Marvin" - } - ] - }, - { - "title": "Marvel Studios: Assembling a Universe", - "overview": "A look at the story behind Marvel Studios and the Marvel Cinematic Universe, featuring interviews and behind-the-scenes footage from all of the Marvel films, the Marvel One-Shots and \"Marvel's Agents of S.H.I.E.L.D.\"", - "release_date": "2014-03-18", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Himself / Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Himself / Thor" - }, - { - "actor": "Chris Evans", - "character": "Himself / Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Himself / Bruce Banner / Hulk" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Herself" - }, - { - "actor": "Clark Gregg", - "character": "Himself" - }, - { - "actor": "Samuel L. Jackson", - "character": "Himself" - }, - { - "actor": "Scarlett Johansson", - "character": "Herself" - }, - { - "actor": "Jeremy Renner", - "character": "Himself" - } - ] - }, - { - "title": "Captain America: Civil War", - "overview": "Following the events of Age of Ultron, the collective governments of the world pass an act designed to regulate all superhuman activity. This polarizes opinion amongst the Avengers, causing two factions to side with Iron Man or Captain America, which causes an epic battle between former allies.", - "release_date": "2016-04-27", - "characters": [ - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - } - ] - }, - { - "title": "Thor: Ragnarok", - "overview": "Thor is imprisoned on the other side of the universe and finds himself in a race against time to get back to Asgard to stop Ragnarok, the destruction of his home-world and the end of Asgardian civilization, at the hands of an all-powerful new threat, the ruthless Hela.", - "release_date": "2017-10-25", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (archive footage / uncredited)" - } - ] - }, - { - "title": "Avengers: Endgame", - "overview": "After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts of the Mad Titan, Thanos. With the help of remaining allies, the Avengers must assemble once more in order to undo Thanos' actions and restore order to the universe once and for all, no matter what consequences may be in store.", - "release_date": "2019-04-24", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / Hulk" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Jeremy Renner", - "character": "Clint Barton / Hawkeye" - }, - { - "actor": "Don Cheadle", - "character": "James Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Pepper Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - } - ] - }, - { - "title": "Avengers: Infinity War", - "overview": "As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain.", - "release_date": "2018-04-25", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Don Cheadle", - "character": "James \"Rhodey\" Rhodes / War Machine" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } - ] - }, - { - "title": "Captain Marvel", - "overview": "The story follows Carol Danvers as she becomes one of the universe’s most powerful heroes when Earth is caught in the middle of a galactic war between two alien races. Set in the 1990s, Captain Marvel is an all-new adventure from a previously unseen period in the history of the Marvel Cinematic Universe.", - "release_date": "2019-03-06", - "characters": [ - { - "actor": "Samuel L. Jackson", - "character": "Nick Fury" - }, - { - "actor": "Clark Gregg", - "character": "Agent Phil Coulson" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America (uncredited)" - }, - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow (uncredited)" - }, - { - "actor": "Don Cheadle", - "character": "James 'Rhodey' Rhodes / War Machine (uncredited)" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk (uncredited)" - } - ] - }, - { - "title": "Spider-Man: Homecoming", - "overview": "Following the events of Captain America: Civil War, Peter Parker, with the help of his mentor Tony Stark, tries to balance his life as an ordinary high school student in Queens, New York City, with fighting crime as his superhero alter ego Spider-Man as a new threat, the Vulture, emerges.", - "release_date": "2017-07-05", - "characters": [ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Gwyneth Paltrow", - "character": "Virginia \"Pepper\" Potts" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - } - ] - }, - { - "title": "Team Thor", - "overview": "Discover what Thor was up to during the events of Captain America: Civil War.", - "release_date": "2016-08-28", - "characters": [ - { - "actor": "Chris Hemsworth", - "character": "Thor Odinson" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner" - } - ] - }, - { - "title": "Black Widow", - "overview": "Natasha Romanoff, also known as Black Widow, confronts the darker parts of her ledger when a dangerous conspiracy with ties to her past arises. Pursued by a force that will stop at nothing to bring her down, Natasha must deal with her history as a spy and the broken relationships left in her wake long before she became an Avenger.", - "release_date": "2020-10-28", - "characters": [ - { - "actor": "Scarlett Johansson", - "character": "Natasha Romanoff / Black Widow" - }, - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - } - ] - } -] -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/analytics.md b/versioned_docs/version-4.7/developers/operations-api/analytics.md deleted file mode 100644 index 470d4066..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/analytics.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: Analytics Operations ---- - -# Analytics Operations - -## get_analytics - -Retrieves analytics data from the server. - -- `operation` _(required)_ - must always be `get_analytics` -- `metric` _(required)_ - any value returned by `list_metrics` -- `start_time` _(optional)_ - Unix timestamp in milliseconds -- `end_time` _(optional)_ - Unix timestamp in milliseconds -- `get_attributes` _(optional)_ - array of attribute names to retrieve -- `conditions` _(optional)_ - array of conditions to filter results (see [search_by_conditions docs](./nosql-operations) for details) - -### Body - -```json -{ - "operation": "get_analytics", - "metric": "resource-usage", - "start_time": 1769198332754, - "end_time": 1769198532754, - "get_attributes": ["id", "metric", "userCPUTime", "systemCPUTime"], - "conditions": [ - { - "attribute": "node", - "operator": "equals", - "value": "node1.example.com" - } - ] -} -``` - -### Response 200 - -```json -[ - { - "id": "12345", - "metric": "resource-usage", - "userCPUTime": 100, - "systemCPUTime": 50 - }, - { - "id": "67890", - "metric": "resource-usage", - "userCPUTime": 150, - "systemCPUTime": 75 - } -] -``` - -## list_metrics - -Returns a list of available metrics that can be queried. - -- `operation` _(required)_ - must always be `list_metrics` -- `metric_types` _(optional)_ - array of metric types to filter results; one or both of `custom` and `builtin`; default is `builtin` - -### Body - -```json -{ - "operation": "list_metrics", - "metric_types": ["custom", "builtin"] -} -``` - -### Response 200 - -```json -["resource-usage", "table-size", "database-size", "main-thread-utilization", "utilization", "storage-volume"] -``` - -## describe_metric - -Provides detailed information about a specific metric, including its structure and available parameters. - -- `operation` _(required)_ - must always be `describe_metric` -- `metric` _(required)_ - name of the metric to describe - -### Body - -```json -{ - "operation": "describe_metric", - "metric": "resource-usage" -} -``` - -### Response 200 - -```json -{ - "attributes": [ - { - "name": "id", - "type": "number" - }, - { - "name": "metric", - "type": "string" - }, - { - "name": "userCPUTime", - "type": "number" - }, - { - "name": "systemCPUTime", - "type": "number" - }, - { - "name": "node", - "type": "string" - } - ] -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/bulk-operations.md b/versioned_docs/version-4.7/developers/operations-api/bulk-operations.md deleted file mode 100644 index b6714552..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/bulk-operations.md +++ /dev/null @@ -1,255 +0,0 @@ ---- -title: Bulk Operations ---- - -# Bulk Operations - -## Export Local - -Exports data based on a given search operation to a local file in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_local` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `path` _(required)_ - path local to the server to export the data -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` -- `filename` _(optional)_ - the name of the file where your export will be written to (do not include extension in filename). If one is not provided it will be autogenerated based on the epoch. - -### Body - -```json -{ - "operation": "export_local", - "format": "json", - "path": "/data/", - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.breed" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 6fc18eaa-3504-4374-815c-44840a12e7e5" -} -``` - ---- - -## CSV Data Load - -Ingests CSV data, provided directly in the operation as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_data_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `data` _(required)_ - csv data to import into Harper - -### Body - -```json -{ - "operation": "csv_data_load", - "database": "dev", - "action": "insert", - "table": "breed", - "data": "id,name,section,country,image\n1,ENGLISH POINTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/001g07.jpg\n2,ENGLISH SETTER,British and Irish Pointers and Setters,GREAT BRITAIN,https://www.fci.be/Nomenclature/Illustrations/002g07.jpg\n3,KERRY BLUE TERRIER,Large and medium sized Terriers,IRELAND,\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", - "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" -} -``` - ---- - -## CSV File Load - -Ingests CSV data, provided via a path on the local filesystem, as an `insert`, `update` or `upsert` into the specified database table. - -_Note: The CSV file must reside on the same machine on which Harper is running. For example, the path to a CSV on your computer will produce an error if your Harper instance is a cloud instance._ - -- `operation` _(required)_ - must always be `csv_file_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `file_path` _(required)_ - path to the csv file on the host running Harper - -### Body - -```json -{ - "operation": "csv_file_load", - "action": "insert", - "database": "dev", - "table": "breed", - "file_path": "/home/user/imports/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 3994d8e2-ec6a-43c4-8563-11c1df81870e", - "job_id": "3994d8e2-ec6a-43c4-8563-11c1df81870e" -} -``` - ---- - -## CSV URL Load - -Ingests CSV data, provided via URL, as an `insert`, `update` or `upsert` into the specified database table. - -- `operation` _(required)_ - must always be `csv_url_load` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `csv_url` _(required)_ - URL to the csv - -### Body - -```json -{ - "operation": "csv_url_load", - "action": "insert", - "database": "dev", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 332aa0a2-6833-46cd-88a6-ae375920436a", - "job_id": "332aa0a2-6833-46cd-88a6-ae375920436a" -} -``` - ---- - -## Export To S3 - -Exports data based on a given search operation from table to AWS S3 in JSON or CSV format. - -- `operation` _(required)_ - must always be `export_to_s3` -- `format` _(required)_ - the format you wish to export the data, options are `json` & `csv` -- `s3` _(required)_ - details your access keys, bucket, bucket region and key for saving the data to S3 -- `search_operation` _(required)_ - search_operation of `search_by_hash`, `search_by_value`, `search_by_conditions` or `sql` - -### Body - -```json -{ - "operation": "export_to_s3", - "format": "json", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - }, - "search_operation": { - "operation": "sql", - "sql": "SELECT * FROM dev.dog" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 9fa85968-4cb1-4008-976e-506c4b13fc4a", - "job_id": "9fa85968-4cb1-4008-976e-506c4b13fc4a" -} -``` - ---- - -## Import from S3 - -This operation allows users to import CSV or JSON files from an AWS S3 bucket as an `insert`, `update` or `upsert`. - -- `operation` _(required)_ - must always be `import_from_s3` -- `action` _(optional)_ - type of action you want to perform - `insert`, `update` or `upsert`. The default is `insert` -- `database` _(optional)_ - name of the database where you are loading your data. The default is `data` -- `table` _(required)_ - name of the table where you are loading your data -- `s3` _(required)_ - object containing required AWS S3 bucket info for operation: - - `aws_access_key_id` - AWS access key for authenticating into your S3 bucket - - `aws_secret_access_key` - AWS secret for authenticating into your S3 bucket - - `bucket` - AWS S3 bucket to import from - - `key` - the name of the file to import - _the file must include a valid file extension ('.csv' or '.json')_ - - `region` - the region of the bucket - -### Body - -```json -{ - "operation": "import_from_s3", - "action": "insert", - "database": "dev", - "table": "dog", - "s3": { - "aws_access_key_id": "YOUR_KEY", - "aws_secret_access_key": "YOUR_SECRET_KEY", - "bucket": "BUCKET_NAME", - "key": "OBJECT_NAME", - "region": "BUCKET_REGION" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 062a1892-6a0a-4282-9791-0f4c93b12e16", - "job_id": "062a1892-6a0a-4282-9791-0f4c93b12e16" -} -``` - ---- - -## Delete Records Before - -Delete data before the specified timestamp on the specified database table exclusively on the node where it is executed. Any clustered nodes with replicated data will retain that data. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_records_before` -- `date` _(required)_ - records older than this date will be deleted. Supported format looks like: `YYYY-MM-DDThh:mm:ss.sZ` -- `schema` _(required)_ - name of the schema where you are deleting your data -- `table` _(required)_ - name of the table where you are deleting your data - -### Body - -```json -{ - "operation": "delete_records_before", - "date": "2021-01-25T23:05:27.464", - "schema": "dev", - "table": "breed" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id d3aed926-e9fe-4ec1-aea7-0fb4451bd373", - "job_id": "d3aed926-e9fe-4ec1-aea7-0fb4451bd373" -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/certificate-management.md b/versioned_docs/version-4.7/developers/operations-api/certificate-management.md deleted file mode 100644 index f8eea402..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/certificate-management.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Add Certificate - -Adds or updates a certificate in the `hdb_certificate` system table. -If a `private_key` is provided it will **not** be stored in `hdb_certificate`, it will be written to file in `/keys/`. -If a `private_key` is not passed the operation will search for one that matches the certificate. If one is not found an error will be returned. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_certificate` -- `name` _(required)_ - a unique name for the certificate -- `certificate` _(required)_ - a PEM formatted certificate string -- `is_authority` _(required)_ - a boolean indicating if the certificate is a certificate authority -- `hosts` _(optional)_ - an array of hostnames that the certificate is valid for -- `private_key` _(optional)_ - a PEM formatted private key string - -### Body - -```json -{ - "operation": "add_certificate", - "name": "my-cert", - "certificate": "-----BEGIN CERTIFICATE-----ZDFAay... -----END CERTIFICATE-----", - "is_authority": false, - "private_key": "-----BEGIN RSA PRIVATE KEY-----Y4dMpw5f... -----END RSA PRIVATE KEY-----" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added certificate: my-cert" -} -``` - ---- - -## Remove Certificate - -Removes a certificate from the `hdb_certificate` system table and deletes the corresponding private key file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_certificate` -- `name` _(required)_ - the name of the certificate - -### Body - -```json -{ - "operation": "remove_certificate", - "name": "my-cert" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed my-cert" -} -``` - ---- - -## List Certificates - -Lists all certificates in the `hdb_certificate` system table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_certificates` - -### Body - -```json -{ - "operation": "list_certificates" -} -``` - -### Response: 200 - -```json -[ - { - "name": "HarperDB-Certificate-Authority-node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\nTANBgkqhk... S34==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": true, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "serial_number": "5235345", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - }, - { - "name": "node1", - "certificate": "-----BEGIN CERTIFICATE-----\r\ngIEcSR1M... 5bv==\r\n-----END CERTIFICATE-----\r\n", - "private_key_name": "privateKey.pem", - "is_authority": false, - "details": { - "issuer": "CN=HarperDB-Certificate-Authority-node1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject": "CN=node.1 C=USA ST=Colorado L=Denver O=HarperDB\\, Inc.", - "subject_alt_name": "IP Address:127.0.0.1, DNS:localhost, IP Address:0:0:0:0:0:0:0:1, DNS:node.1", - "serial_number": "5243646", - "valid_from": "Aug 27 15:00:00 2024 GMT", - "valid_to": "Aug 25 15:00:00 2034 GMT" - }, - "is_self_signed": true, - "uses": ["https", "wss"] - } -] -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/clustering-nats.md b/versioned_docs/version-4.7/developers/operations-api/clustering-nats.md deleted file mode 100644 index 0ba3af74..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/clustering-nats.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: Clustering using NATS ---- - -# Clustering using NATS - -## Cluster Set Routes - -Adds a route/routes to either the hub or leaf server cluster configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `server` _(required)_ - must always be `hub` or `leaf`, in most cases you should use `hub` here -- `routes` _(required)_ - must always be an objects array with a host and port: - - `host` - the host of the remote instance you are clustering to - - `port` - the clustering port of the remote instance you are clustering to, in most cases this is the value in `clustering.hubServer.cluster.network.port` on the remote instance `harperdb-config.yaml` - -### Body - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets all the hub and leaf server routes from the config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -{ - "hub": [ - { - "host": "3.22.181.22", - "port": 12345 - }, - { - "host": "3.137.184.8", - "port": 12345 - }, - { - "host": "18.223.239.195", - "port": 12345 - }, - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "leaf": [] -} -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from hub and/or leaf server routes array in config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "host": "18.116.24.71", - "port": 12345 - } - ], - "skipped": [] -} -``` - ---- - -## Add Node - -Registers an additional Harper instance with associated subscriptions. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `node_name` _(required)_ - the node name of the remote node -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "add_node", - "node_name": "ec2-3-22-181-22", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": false, - "publish": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'ec2-3-22-181-22' to manifest" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance registration and associated subscriptions. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_node` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `schema`, `table`, `subscribe` and `publish`: - - `schema` - the schema to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - - `start_time` _(optional)_ - How far back to go to get transactions from node being added. Must be in UTC YYYY-MM-DDTHH:mm:ss.sssZ format - -### Body - -```json -{ - "operation": "update_node", - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Set Node Replication - -A more adeptly named alias for add and update node. This operation behaves as a PATCH/upsert, meaning it will insert or update the specified replication configurations while leaving other table replication configuration untouched. The `database` (aka `schema`) parameter is optional, it will default to `data`. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_node_replication` -- `node_name` _(required)_ - the node name of the remote node you are updating -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and `table`, `subscribe` and `publish`: - - `database` _(optional)_ - the database to replicate from - - `table` _(required)_ - the table to replicate from - - `subscribe` _(required)_ - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` _(required)_ - a boolean which determines if transactions on the local table should be replicated on the remote table -- - -### Body - -```json -{ - "operation": "set_node_replication", - "node_name": "node1", - "subscriptions": [ - { - "table": "dog", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'ec2-3-22-181-22'" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. A status object will contain the clustering node name, whether or not clustering is enabled, and a list of possible connections. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "node_name": "ec2-18-221-143-69", - "is_enabled": true, - "connections": [ - { - "node_name": "ec2-3-22-181-22", - "status": "open", - "ports": { - "clustering": 12345, - "operations_api": 9925 - }, - "latency_ms": 13, - "uptime": "30d 1h 18m 8s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - } - ] -} -``` - ---- - -## Cluster Network - -Returns an object array of enmeshed nodes. Each node object will contain the name of the node, the amount of time (in milliseconds) it took for it to respond, the names of the nodes it is enmeshed with and the routes set in its config file. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_- must always be `cluster_network` -- `timeout` _(optional)_ - the amount of time in milliseconds to wait for a response from the network. Must be a number -- `connected_nodes` _(optional)_ - omit `connected_nodes` from the response. Must be a boolean. Defaults to `false` -- `routes` _(optional)_ - omit `routes` from the response. Must be a boolean. Defaults to `false` - -### Body - -```json -{ - "operation": "cluster_network" -} -``` - -### Response: 200 - -```json -{ - "nodes": [ - { - "name": "local_node", - "response_time": 4, - "connected_nodes": ["ec2-3-142-255-78"], - "routes": [ - { - "host": "3.142.255.78", - "port": 9932 - } - ] - }, - { - "name": "ec2-3-142-255-78", - "response_time": 57, - "connected_nodes": ["ec2-3-12-153-124", "ec2-3-139-236-138", "local_node"], - "routes": [] - } - ] -} -``` - ---- - -## Remove Node - -Removes a Harper instance and associated subscriptions from the cluster. Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are de-registering - -### Body - -```json -{ - "operation": "remove_node", - "node_name": "ec2-3-22-181-22" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'ec2-3-22-181-22' from manifest" -} -``` - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. -Learn more about [Harper clustering here](../../reference/clustering). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object containing `node_name` and `subscriptions` for that node - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "ec2-3-137-184-8", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": false - } - ] - }, - { - "node_name": "ec2-18-223-239-195", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Purge Stream - -Will purge messages from a stream - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `purge_stream` -- `database` _(required)_ - the name of the database where the streams table resides -- `table` _(required)_ - the name of the table that belongs to the stream -- `options` _(optional)_ - control how many messages get purged. Options are: - - `keep` - purge will keep this many most recent messages - - `seq` - purge all messages up to, but not including, this sequence - -### Body - -```json -{ - "operation": "purge_stream", - "database": "dev", - "table": "dog", - "options": { - "keep": 100 - } -} -``` - ---- diff --git a/versioned_docs/version-4.7/developers/operations-api/clustering.md b/versioned_docs/version-4.7/developers/operations-api/clustering.md deleted file mode 100644 index b040e323..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/clustering.md +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: Clustering ---- - -# Clustering - -The following operations are available for configuring and managing [Harper replication](../replication/). - -_**If you are using NATS for clustering, please see the**_ [_**NATS Clustering Operations**_](clustering-nats) _**documentation.**_ - -## Add Node - -Adds a new Harper instance to the cluster. If `subscriptions` are provided, it will also create the replication relationships between the nodes. If they are not provided a fully replicating system will be created. [Learn more about adding nodes here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_node` -- `hostname` or `url` _(required)_ - one of these fields is required. You must provide either the `hostname` or the `url` of the node you want to add -- `verify_tls` _(optional)_ - a boolean which determines if the TLS certificate should be verified. This will allow the Harper default self-signed certificates to be accepted. Defaults to `true` -- `authorization` _(optional)_ - an object or a string which contains the authorization information for the node being added. If it is an object, it should contain `username` and `password` fields. If it is a string, it should use HTTP `Authorization` style credentials -- `retain_authorization` _(optional)_ - a boolean which determines if the authorization credentials should be retained/stored and used everytime a connection is made to this node. If `true`, the authorization will be stored on the node record. Generally this should not be used, as mTLS/certificate based authorization is much more secure and safe, and avoids the need for storing credentials. Defaults to `false`. -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(optional)_ - The relationship created between nodes. If not provided a fully replicated cluster will be setup. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate - - `table` - the table to replicate - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added 'server-two' to cluster" -} -``` - ---- - -## Update Node - -Modifies an existing Harper instance in the cluster. - -_Operation is restricted to super_user roles only_ - -_Note: will attempt to add the node if it does not exist_ - -- `operation` _(required)_ - must always be `update_node` -- `hostname` _(required)_ - the `hostname` of the remote node you are updating -- `revoked_certificates` _(optional)_ - an array of revoked certificates serial numbers. If a certificate is revoked, it will not be accepted for any connections. -- `shard` _(optional)_ - a number which can be used to indicate which shard this node belongs to. This is only needed if you are using sharding. -- `subscriptions` _(required)_ - The relationship created between nodes. Must be an object array and include `database`, `table`, `subscribe` and `publish`: - - `database` - the database to replicate from - - `table` - the table to replicate from - - `subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table - - `publish` - a boolean which determines if transactions on the local table should be replicated on the remote table - -### Body - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "subscribe": true, - "publish": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated 'server-two'" -} -``` - ---- - -## Remove Node - -Removes a Harper node from the cluster and stops replication, [Learn more about remove node here](../replication/). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `remove_node` -- `name` _(required)_ - The name of the node you are removing - -### Body - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully removed 'server-two' from cluster" -} -``` - ---- - -## Cluster Status - -Returns an array of status objects from a cluster. - -`database_sockets` shows the actual websocket connections that exist between nodes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_status` - -### Body - -```json -{ - "operation": "cluster_status" -} -``` - -### Response: 200 - -```json -{ - "type": "cluster-status", - "connections": [ - { - "replicateByDefault": true, - "replicates": true, - "url": "wss://server-2.domain.com:9933", - "name": "server-2.domain.com", - "subscriptions": null, - "database_sockets": [ - { - "database": "data", - "connected": true, - "latency": 0.7, - "thread_id": 1, - "nodes": ["server-2.domain.com"], - "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", - "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", - "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", - "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" - } - ] - } - ], - "node_name": "server-1.domain.com", - "is_enabled": true -} -``` - -There is a separate socket for each database for each node. Each node is represented in the connections array, and each database connection to that node is represented in the `database_sockets` array. Additional timing statistics include: - -- `lastCommitConfirmed`: When a commit is sent out, it should receive a confirmation from the remote server; this is the last receipt of confirmation of an outgoing commit. -- `lastReceivedRemoteTime`: This is the timestamp of the transaction that was last received. The timestamp is from when the original transaction occurred. -- `lastReceivedLocalTime`: This is local time when the last transaction was received. If there is a different between this and `lastReceivedRemoteTime`, it means there is a delay from the original transaction to \* receiving it and so it is probably catching-up/behind. -- `sendingMessage`: The timestamp of transaction is actively being sent. This won't exist if the replicator is waiting for the next transaction to send. - ---- - -## Configure Cluster - -Bulk create/remove subscriptions for any number of remote nodes. Resets and replaces any existing clustering setup. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `configure_cluster` -- `connections` _(required)_ - must be an object array with each object following the `add_node` schema. - -### Body - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password2" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "my-table", - "subscribe": true, - "publish": false - } - ] - }, - { - "hostname": "server-three", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password3" - }, - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "subscribe": true, - "publish": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "Cluster successfully configured." -} -``` - ---- - -## Cluster Set Routes - -Adds a route/routes to the `replication.routes` configuration. This operation behaves as a PATCH/upsert, meaning it will add new routes to the configuration while leaving existing routes untouched. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_set_routes` -- `routes` _(required)_ - the routes field is an array that specifies the routes for clustering. Each element in the array can be either a string or an object with `hostname` and `port` properties. - -### Body - -```json -{ - "operation": "cluster_set_routes", - "routes": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully set", - "set": [ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` - ---- - -## Cluster Get Routes - -Gets the replication routes from the Harper config file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_get_routes` - -### Body - -```json -{ - "operation": "cluster_get_routes" -} -``` - -### Response: 200 - -```json -[ - "wss://server-two:9925", - { - "hostname": "server-three", - "port": 9930 - } -] -``` - ---- - -## Cluster Delete Routes - -Removes route(s) from the Harper config file. Returns a deletion success message and arrays of deleted and skipped records. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `cluster_delete_routes` -- `routes` _(required)_ - Must be an array of route object(s) - -### Body - -```json -{ - "operation": "cluster_delete_routes", - "routes": [ - { - "hostname": "server-three", - "port": 9930 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "cluster routes successfully deleted", - "deleted": [ - { - "hostname": "server-three", - "port": 9930 - } - ], - "skipped": [] -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/components.md b/versioned_docs/version-4.7/developers/operations-api/components.md deleted file mode 100644 index 36d61986..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/components.md +++ /dev/null @@ -1,553 +0,0 @@ ---- -title: Components ---- - -# Components - -## Add Component - -Creates a new component project in the component root directory using a specified template (defaults to the [application template](https://github.com/HarperFast/application-template)). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_component` -- `project` _(required)_ - the name of the project you wish to create -- `template` _(optional)_ - the URL of a git repository to use as a template. Must be a string. Defaults to `https://github.com/HarperFast/application-template` -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. Defaults to `npm install`. Depending on the host environment, you can use this to switch to using an alternative package manager. -- `install_timeout` _(optional)_ - The maximum time, in milliseconds, to wait for the install command to complete. Must be a number. Defaults to `300000` (5 minutes) -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_component", - "project": "my-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully added project: my-component" -} -``` - ---- - -## Deploy Component - -Will deploy a component using either a base64-encoded string representation of a `.tar` file (the output from `package_component`) or a package value, which can be any valid NPM reference, such as a GitHub repo, an NPM package, a tarball, a local directory or a website. - -If deploying with the `payload` option, Harper will decrypt the base64-encoded string, reconstitute the .tar file of your project folder, and extract it to the component root project directory. - -If deploying with the `package` option, the package value will be written to `harperdb-config.yaml`. Then npm install will be utilized to install the component in the `node_modules` directory located in the hdb root. The value is a package reference, which should generally be a [URL reference, as described here](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#urls-as-dependencies) (it is also possible to include NPM registerd packages and file paths). URL package references can directly reference tarballs that can be installed as a package. However, the most common and recommended usage is to install from a Git repository, which can be combined with a tag to deploy a specific version directly from versioned source control. When using tags, we highly recommend that you use the `semver` directive to ensure consistent and reliable installation by NPM. In addition to tags, you can also reference branches or commit numbers. Here is an example URL package reference to a (public) Git repository that doesn't require authentication: - -``` -https://github.com/HarperDB/application-template#semver:v1.0.0 -``` - -or this can be shortened to: - -``` -HarperDB/application-template#semver:v1.0.0 -``` - -You can also install from private repository if you have installed SSH keys on the server. Ensure the `host` portion of the url exactly matches the `host` used when adding ssh keys to ensure proper authentication. - -``` -git+ssh://git@github.com:my-org/my-app.git#semver:v1.0.0 -``` - -Or you can use a Github token: - -``` -https://@github.com/my-org/my-app#semver:v1.0.0 -``` - -Or you can use a GitLab Project Access Token: - -``` -https://my-project:@gitlab.com/my-group/my-project#semver:v1.0.0 -``` - -Note that your component will be installed by NPM. If your component has dependencies, NPM will attempt to download and install these as well. NPM normally uses the public registry.npmjs.org registry. If you are installing without network access to this, you may wish to define [custom registry locations](https://docs.npmjs.com/cli/v8/configuring-npm/npmrc) if you have any dependencies that need to be installed. NPM will install the deployed component and any dependencies in node_modules in the hdb root directory (typically `~/hdb/node_modules`). - -_Note: After deploying a component a restart may be required_ - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_component` -- `project` _(required)_ - the name of the project you wish to deploy -- `package` _(optional)_ - this can be any valid GitHub or NPM reference -- `payload` _(optional)_ - a base64-encoded string representation of the .tar file. Must be a string -- `force` _(optional)_ - if true, allows deploying over protected core components. Must be a boolean. Defaults to `false`. Core system components (like `graphql`, `http`, `authentication`, etc.) are protected to prevent accidentally breaking HarperDB. User-defined components can be redeployed without this flag. -- `restart` _(optional)_ - must be either a boolean or the string `rolling`. If set to `rolling`, a rolling restart will be triggered after the component is deployed, meaning that each node in the cluster will be sequentially restarted (waiting for the last restart to start the next). If set to `true`, the restart will not be rolling, all nodes will be restarted in parallel. If `replicated` is `true`, the restart operations will be replicated across the cluster. -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. -- `install_command` _(optional)_ - A command to use when installing the component. Must be a string. Defaults to `npm install`. Depending on the host environment, you can use this to switch to using an alternative package manager. -- `install_timeout` _(optional)_ - The maximum time, in milliseconds, to wait for the install command to complete. Must be a number. Defaults to `300000` (5 minutes) - -### Body - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "payload": "A very large base64-encoded string representation of the .tar file" -} -``` - -```json -{ - "operation": "deploy_component", - "project": "my-component", - "package": "HarperDB/application-template", - "replicated": true -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed: my-component" -} -``` - ---- - -## Package Component - -Creates a temporary `.tar` file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string and the payload. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_component` -- `project` _(required)_ - the name of the project you wish to package -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean - -### Body - -```json -{ - "operation": "package_component", - "project": "my-component", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "my-component", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" -} -``` - ---- - -## Drop Component - -Deletes a file from inside the component project or deletes the complete project. - -**If just `project` is provided it will delete all that projects local files and folders** - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_component` -- `project` _(required)_ - the name of the project you wish to delete or to delete from if using the `file` parameter -- `file` _(optional)_ - the path relative to your project folder of the file you wish to delete -- `replicated` _(optional)_ - if true, Harper will replicate the component deletion to all nodes in the cluster. Must be a boolean. -- `restart` _(optional)_ - if true, Harper will restart after dropping the component. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_component", - "project": "my-component", - "file": "utils/myUtils.js" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully dropped: my-component/utils/myUtils.js" -} -``` - ---- - -## Get Components - -Gets all local component files and folders and any component config from `harperdb-config.yaml` - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_components` - -### Body - -```json -{ - "operation": "get_components" -} -``` - -### Response: 200 - -```json -{ - "name": "components", - "entries": [ - { - "package": "HarperDB/application-template", - "name": "deploy-test-gh" - }, - { - "package": "@fastify/compress", - "name": "fast-compress" - }, - { - "name": "my-component", - "entries": [ - { - "name": "LICENSE", - "mtime": "2023-08-22T16:00:40.286Z", - "size": 1070 - }, - { - "name": "index.md", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1207 - }, - { - "name": "config.yaml", - "mtime": "2023-08-22T16:00:40.287Z", - "size": 1069 - }, - { - "name": "package.json", - "mtime": "2023-08-22T16:00:40.288Z", - "size": 145 - }, - { - "name": "resources.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - }, - { - "name": "schema.graphql", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 466 - }, - { - "name": "utils", - "entries": [ - { - "name": "commonUtils.js", - "mtime": "2023-08-22T16:00:40.289Z", - "size": 583 - } - ] - } - ] - } - ] -} -``` - ---- - -## Get Component File - -Gets the contents of a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_component_file` -- `project` _(required)_ - the name of the project where the file is located -- `file` _(required)_ - the path relative to your project folder of the file you wish to view -- `encoding` _(optional)_ - the encoding that will be passed to the read file call. Defaults to `utf8` - -### Body - -```json -{ - "operation": "get_component_file", - "project": "my-component", - "file": "resources.js" -} -``` - -### Response: 200 - -```json -{ - "message": "/**export class MyCustomResource extends tables.TableName {\n\t/ we can define our own custom POST handler\n\tpost(content) {\n\t\t/ do something with the incoming content;\n\t\treturn super.post(content);\n\t}\n\t/ or custom GET handler\n\tget() {\n\t\t/ we can modify this resource before returning\n\t\treturn super.get();\n\t}\n}\n */\n/ we can also define a custom resource without a specific table\nexport class Greeting extends Resource {\n\t/ a \"Hello, world!\" handler\n\tget() {\n\t\treturn { greeting: 'Hello, world!' };\n\t}\n}" -} -``` - ---- - -## Set Component File - -Creates or updates a file inside a component project. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_component_file` -- `project` _(required)_ - the name of the project the file is located in -- `file` _(required)_ - the path relative to your project folder of the file you wish to set -- `payload` _(required)_ - what will be written to the file -- `encoding` _(optional)_ - the encoding that will be passed to the write file call. Defaults to `utf8` -- `replicated` _(optional)_ - if true, Harper will replicate the component update to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_component_file", - "project": "my-component", - "file": "test.js", - "payload": "console.log('hello world')" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully set component: test.js" -} -``` - ---- - -## Add SSH Key - -Adds an SSH key for deploying components from private repositories. This will also create an ssh config file that will be used when deploying the components. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_ssh_key` -- `name` _(required)_ - the name of the key -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key. -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` -- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "add_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nfake\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Added ssh key: harperdb-private-component" -} -``` - -### Generated Config and Deploy Component "package" string examples - -``` -#harperdb-private-component -Host harperdb-private-component.github.com - HostName github.com - User git - IdentityFile /hdbroot/ssh/harperdb-private-component.key - IdentitiesOnly yes -``` - -``` -"package": "git+ssh://git@:.git#semver:v1.2.3" - -"package": "git+ssh://git@harperdb-private-component.github.com:HarperDB/harperdb-private-component.git#semver:v1.2.3" -``` - -Note that `deploy_component` with a package uses `npm install` so the url must be a valid npm format url. The above is an example of a url using a tag in the repo to install. - ---- - -## Update SSH Key - -Updates the private key contents of an existing SSH key. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `update_ssh_key` -- `name` _(required)_ - the name of the key to be updated -- `key` _(required)_ - the private key contents. Must be an ed25519 key. Line breaks must be delimited with `\n` and have a trailing `\n` -- `host` _(required)_ - the host for the ssh config (see below). Used as part of the `package` url when deploying a component using this key. -- `hostname` _(required)_ - the hostname for the ssh config (see below). Used to map `host` to an actual domain (e.g. `github.com`) -- `known_hosts` _(optional)_ - the public SSH keys of the host your component will be retrieved from. If `hostname` is `github.com` this will be retrieved automatically. Line breaks must be delimited with `\n` -- `replicated` _(optional)_ - if true, HarperDB will replicate the key to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "update_ssh_key", - "name": "harperdb-private-component", - "key": "-----BEGIN OPENSSH PRIVATE KEY-----\nthis\nis\na\nNEWFAKE\nkey\n-----END OPENSSH PRIVATE KEY-----\n", - "host": "harperdb-private-component.github.com", - "hostname": "github.com" -} -``` - -### Response: 200 - -```json -{ - "message": "Updated ssh key: harperdb-private-component" -} -``` - -## Delete SSH Key - -Deletes a SSH key. This will also remove it from the generated SSH config. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_ssh_key` -- `name` _(required)_ - the name of the key to be deleted -- `replicated` _(optional)_ - if true, Harper will replicate the key deletion to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "name": "harperdb-private-component" -} -``` - -### Response: 200 - -```json -{ - "message": "Deleted ssh key: harperdb-private-component" -} -``` - ---- - -## List SSH Keys - -List off the names of added SSH keys - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_ssh_keys` - -### Body - -```json -{ - "operation": "list_ssh_keys" -} -``` - -### Response: 200 - -```json -[ - { - "name": "harperdb-private-component" - } -] -``` - -_Note: Additional SSH keys would appear as more objects in this array_ - ---- - -## Set SSH Known Hosts - -Sets the SSH known_hosts file. This will overwrite the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_ssh_known_hosts` -- `known_hosts` _(required)_ - The contents to set the known_hosts to. Line breaks must be delimite d with -- `replicated` _(optional)_ - if true, Harper will replicate the known hosts to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "set_ssh_known_hosts", - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Known hosts successfully set" -} -``` - -## Get SSH Known Hosts - -Gets the contents of the known_hosts file - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_ssh_known_hosts` - -### Body - -```json -{ - "operation": "get_ssh_known_hosts" -} -``` - -### Response: 200 - -```json -{ - "known_hosts": "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n" -} -``` - ---- - -## Install Node Modules - -:::warning Deprecated -This operation is deprecated, as it is handled automatically by [deploy_component](#deploy-component) and [restart](./system-operations#restart). -::: - -Executes npm install against specified custom function projects. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `install_node_modules` -- `projects` _(required)_ - must ba an array of custom functions projects. -- `dry_run` _(optional)_ - refers to the npm --dry-run flag: [https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run](https://docs.npmjs.com/cli/v8/commands/npm-install#dry-run). Defaults to false. - -### Body - -```json -{ - "operation": "install_node_modules", - "projects": ["dogs", "cats"], - "dry_run": true -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/configuration.md b/versioned_docs/version-4.7/developers/operations-api/configuration.md deleted file mode 100644 index 9872da4f..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/configuration.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -## Set Configuration - -Modifies the Harper configuration file parameters. Must follow with a [restart](./system-operations#restart) or [restart_service](./system-operations#restart-service) operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_configuration` -- `logging_level` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file -- `clustering_enabled` _(optional)_ - one or more configuration keywords to be updated in the Harper configuration file - -### Body - -```json -{ - "operation": "set_configuration", - "logging_level": "trace", - "clustering_enabled": true -} -``` - -### Response: 200 - -```json -{ - "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." -} -``` - ---- - -## Get Configuration - -Returns the Harper configuration parameters. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_configuration` - -### Body - -```json -{ - "operation": "get_configuration" -} -``` - -### Response: 200 - -```json -{ - "http": { - "compressionThreshold": 1200, - "cors": false, - "corsAccessList": [null], - "keepAliveTimeout": 30000, - "port": 9926, - "securePort": null, - "timeout": 120000 - }, - "threads": 11, - "authentication": { - "cacheTTL": 30000, - "enableSessions": true, - "operationTokenTimeout": "1d", - "refreshTokenTimeout": "30d" - }, - "analytics": { - "aggregatePeriod": 60 - }, - "replication": { - "hostname": "node1", - "databases": "*", - "routes": null, - "url": "wss://127.0.0.1:9925" - }, - "componentsRoot": "/Users/hdb/components", - "localStudio": { - "enabled": false - }, - "logging": { - "auditAuthEvents": { - "logFailed": false, - "logSuccessful": false - }, - "auditLog": true, - "auditRetention": "3d", - "file": true, - "level": "error", - "root": "/Users/hdb/log", - "rotation": { - "enabled": false, - "compress": false, - "interval": null, - "maxSize": null, - "path": "/Users/hdb/log" - }, - "stdStreams": false - }, - "mqtt": { - "network": { - "port": 1883, - "securePort": 8883 - }, - "webSocket": true, - "requireAuthentication": true - }, - "operationsApi": { - "network": { - "cors": true, - "corsAccessList": ["*"], - "domainSocket": "/Users/hdb/operations-server", - "port": 9925, - "securePort": null - } - }, - "rootPath": "/Users/hdb", - "storage": { - "writeAsync": false, - "caching": true, - "compression": false, - "noReadAhead": true, - "path": "/Users/hdb/database", - "prefetchWrites": true - }, - "tls": { - "privateKey": "/Users/hdb/keys/privateKey.pem" - } -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/custom-functions.md b/versioned_docs/version-4.7/developers/operations-api/custom-functions.md deleted file mode 100644 index 2c469bf4..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/custom-functions.md +++ /dev/null @@ -1,281 +0,0 @@ ---- -title: Custom Functions ---- - -# Custom Functions - -:::warning Deprecated -These operations are deprecated. -::: - -## Custom Functions Status - -Returns the state of the Custom functions server. This includes whether it is enabled, upon which port it is listening, and where its root project directory is located on the host machine. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `custom_function_status` - -### Body - -```json -{ - "operation": "custom_functions_status" -} -``` - -### Response: 200 - -```json -{ - "is_enabled": true, - "port": 9926, - "directory": "/Users/myuser/hdb/custom_functions" -} -``` - ---- - -## Get Custom Functions - -Returns an array of projects within the Custom Functions root project directory. Each project has details including each of the files in the routes and helpers directories, and the total file count in the static folder. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_functions` - -### Body - -```json -{ - "operation": "get_custom_functions" -} -``` - -### Response: 200 - -```json -{ - "dogs": { - "routes": ["examples"], - "helpers": ["example"], - "static": 3 - } -} -``` - ---- - -## Get Custom Function - -Returns the content of the specified file as text. HarperDStudio uses this call to render the file content in its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to get content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to get content - must be either routes or helpers -- `file` _(required)_ - The name of the file for which you wish to get content - should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "get_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - ---- - -## Set Custom Function - -Updates the content of the specified file. Harper Studio uses this call to save any changes made through its built-in code editor. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_custom_function` -- `project` _(required)_ - the name of the project containing the file for which you wish to set content -- `type` _(required)_ - the name of the sub-folder containing the file for which you wish to set content - must be either routes or helpers -- `file` _(required)_ - the name of the file for which you wish to set content - should not include the file extension (which is always .js) -- `function_content` _(required)_ - the content you wish to save into the specified file - -### Body - -```json -{ - "operation": "set_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example", - "function_content": "'use strict';\n\nconst https = require('https');\n\nconst authRequest = (options) => {\n return new Promise((resolve, reject) => {\n const req = https.request(options, (res) => {\n res.setEncoding('utf8');\n let responseBody = '';\n\n res.on('data', (chunk) => {\n responseBody += chunk;\n });\n\n res.on('end', () => {\n resolve(JSON.parse(responseBody));\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.end();\n });\n};\n\nconst customValidation = async (request,logger) => {\n const options = {\n hostname: 'jsonplaceholder.typicode.com',\n port: 443,\n path: '/todos/1',\n method: 'GET',\n headers: { authorization: request.headers.authorization },\n };\n\n const result = await authRequest(options);\n\n /*\n * throw an authentication error based on the response body or statusCode\n */\n if (result.error) {\n const errorString = result.error || 'Sorry, there was an error authenticating your request';\n logger.error(errorString);\n throw new Error(errorString);\n }\n return request;\n};\n\nmodule.exports = customValidation;\n" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully updated custom function: example.js" -} -``` - ---- - -## Drop Custom Function - -Deletes the specified file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function` -- `project` _(required)_ - the name of the project containing the file you wish to delete -- `type` _(required)_ - the name of the sub-folder containing the file you wish to delete. Must be either routes or helpers -- `file` _(required)_ - the name of the file you wish to delete. Should not include the file extension (which is always .js) - -### Body - -```json -{ - "operation": "drop_custom_function", - "project": "dogs", - "type": "helpers", - "file": "example" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted custom function: example.js" -} -``` - ---- - -## Add Custom Function Project - -Creates a new project folder in the Custom Functions root project directory. It also inserts into the new directory the contents of our Custom Functions Project template, which is available publicly, here: [https://github.com/HarperDB/harperdb-custom-functions-template](https://github.com/HarperDB/harperdb-custom-functions-template). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_custom_function_project` -- `project` _(required)_ - the name of the project you wish to create - -### Body - -```json -{ - "operation": "add_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully created custom function project: dogs" -} -``` - ---- - -## Drop Custom Function Project - -Deletes the specified project folder and all of its contents. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_custom_function_project` -- `project` _(required)_ - the name of the project you wish to delete - -### Body - -```json -{ - "operation": "drop_custom_function_project", - "project": "dogs" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deleted project: dogs" -} -``` - ---- - -## Package Custom Function Project - -Creates a .tar file of the specified project folder, then reads it into a base64-encoded string and returns an object with the string, the payload and the file. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `package_custom_function_project` -- `project` _(required)_ - the name of the project you wish to package up for deployment -- `skip_node_modules` _(optional)_ - if true, creates option for tar module that will exclude the project's node_modules directory. Must be a boolean. - -### Body - -```json -{ - "operation": "package_custom_function_project", - "project": "dogs", - "skip_node_modules": true -} -``` - -### Response: 200 - -```json -{ - "project": "dogs", - "payload": "LgAAAAAAAAAAAAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", - "file": "/tmp/d27f1154-5d82-43f0-a5fb-a3018f366081.tar" -} -``` - ---- - -## Deploy Custom Function Project - -Takes the output of package_custom_function_project, decrypts the base64-encoded string, reconstitutes the .tar file of your project folder, and extracts it to the Custom Functions root project directory. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `deploy_custom_function_project` -- `project` _(required)_ - the name of the project you wish to deploy. Must be a string -- `payload` _(required)_ - a base64-encoded string representation of the .tar file. Must be a string - -### Body - -```json -{ - "operation": "deploy_custom_function_project", - "project": "dogs", - "payload": "A very large base64-encoded string represenation of the .tar file" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully deployed project: dogs" -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md b/versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md deleted file mode 100644 index 936425c3..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/databases-and-tables.md +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: Databases and Tables ---- - -# Databases and Tables - -## Describe All - -Returns the definitions of all databases and tables within the database. Record counts above 5000 records are estimated, as determining the exact count can be expensive. When the record count is estimated, this is indicated by the inclusion of a confidence interval of `estimated_record_range`. If you need the exact count, you can include an `"exact_count": true` in the operation, but be aware that this requires a full table scan (may be expensive). - -- `operation` _(required)_ - must always be `describe_all` - -### Body - -```json -{ - "operation": "describe_all" -} -``` - -### Response: 200 - -```json -{ - "dev": { - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } - } -} -``` - ---- - -## Describe database - -Returns the definitions of all tables within the specified database. - -- `operation` _(required)_ - must always be `describe_database` -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "dog": { - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 - } -} -``` - ---- - -## Describe Table - -Returns the definition of the specified table. - -- `operation` _(required)_ - must always be `describe_table` -- `table` _(required)_ - table you wish to describe -- `database` _(optional)_ - database where the table you wish to describe lives. The default is `data` - -### Body - -```json -{ - "operation": "describe_table", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "schema": "dev", - "name": "dog", - "hash_attribute": "id", - "audit": true, - "schema_defined": false, - "attributes": [ - { - "attribute": "id", - "indexed": true, - "is_primary_key": true - }, - { - "attribute": "__createdtime__", - "indexed": true - }, - { - "attribute": "__updatedtime__", - "indexed": true - }, - { - "attribute": "type", - "indexed": true - } - ], - "clustering_stream_name": "dd9e90c2689151ab812e0f2d98816bff", - "record_count": 4000, - "estimated_record_range": [3976, 4033], - "last_updated_record": 1697658683698.4504 -} -``` - ---- - -## Create database - -Create a new database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_database` -- `database` _(optional)_ - name of the database you are creating. The default is `data` - -### Body - -```json -{ - "operation": "create_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "database 'dev' successfully created" -} -``` - ---- - -## Drop database - -Drop an existing database. NOTE: Dropping a database will delete all tables and all of their records in that database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_database` -- `database` _(required)_ - name of the database you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_database", - "database": "dev" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted 'dev'" -} -``` - ---- - -## Create Table - -Create a new table within a database. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `create_table` -- `database` _(optional)_ - name of the database where you want your table to live. If the database does not exist, it will be created. If the `database` property is not provided it will default to `data`. -- `table` _(required)_ - name of the table you are creating -- `primary_key` _(required)_ - primary key for the table -- `attributes` _(optional)_ - an array of attributes that specifies the schema for the table, that is the set of attributes for the table. When attributes are supplied the table will not be considered a "dynamic schema" table, and attributes will not be auto-added when records with new properties are inserted. Each attribute is specified as: - - `name` _(required)_ - the name of the attribute - - `indexed` _(optional)_ - indicates if the attribute should be indexed - - `type` _(optional)_ - specifies the data type of the attribute (can be String, Int, Float, Date, ID, Any) -- `expiration` _(optional)_ - specifies the time-to-live or expiration of records in the table before they are evicted (records are not evicted on any timer if not specified). This is specified in seconds. - -### Body - -```json -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'dev.dog' successfully created." -} -``` - ---- - -## Drop Table - -Drop an existing database table. NOTE: Dropping a table will delete all associated records in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_table` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - name of the table you are dropping -- `replicated` _(optional)_ - if true, Harper will replicate the component to all nodes in the cluster. Must be a boolean. - -### Body - -```json -{ - "operation": "drop_table", - "database": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted table 'dev.dog'" -} -``` - ---- - -## Create Attribute - -Create a new attribute within the specified table. **The create_attribute operation can be used for admins wishing to pre-define database values for setting role-based permissions or for any other reason.** - -_Note: Harper will automatically create new attributes on insert and update if they do not already exist within the database._ - -- `operation` _(required)_ - must always be `create_attribute` -- `database` _(optional)_ - name of the database of the table you want to add your attribute. The default is `data` -- `table` _(required)_ - name of the table where you want to add your attribute to live -- `attribute` _(required)_ - name for the attribute - -### Body - -```json -{ - "operation": "create_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "skipped_hashes": [], - "inserted_hashes": ["383c0bef-5781-4e1c-b5c8-987459ad0831"] -} -``` - ---- - -## Drop Attribute - -Drop an existing attribute from the specified table. NOTE: Dropping an attribute will delete all associated attribute values in that table. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `drop_attribute` -- `database` _(optional)_ - database where the table you are dropping lives. The default is `data` -- `table` _(required)_ - table where the attribute you are dropping lives -- `attribute` _(required)_ - attribute that you intend to drop - -### Body - -```json -{ - "operation": "drop_attribute", - "database": "dev", - "table": "dog", - "attribute": "is_adorable" -} -``` - -### Response: 200 - -```json -{ - "message": "successfully deleted attribute 'is_adorable'" -} -``` - ---- - -## Get Backup - -This will return a snapshot of the requested database. This provides a means for backing up the database through the operations API. The response will be the raw database file (in binary format), which can later be restored as a database file by copying into the appropriate hdb/databases directory (with Harper not running). The returned file is a snapshot of the database at the moment in time that the get_backup operation begins. This also supports backing up individual tables in a database. However, this is a more expensive operation than backing up a database in whole, and will lose any transactional atomicity between writes across tables, so generally it is recommended that you backup the entire database. - -It is important to note that trying to copy a database file that is in use (Harper actively running and writing to the file) using standard file copying tools is not safe (the copied file will likely be corrupt), which is why using this snapshot operation is recommended for backups (volume snapshots are also a good way to backup Harper databases). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this should always be `get_backup` -- `database` _(required)_ - this is the database that will be snapshotted and returned -- `table` _(optional)_ - this will specify a specific table to backup -- `tables` _(optional)_ - this will specify a specific set of tables to backup - -### Body - -```json -{ - "operation": "get_backup", - "database": "dev" -} -``` - -### Response: 200 - -``` -The database in raw binary data format -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/index.md b/versioned_docs/version-4.7/developers/operations-api/index.md deleted file mode 100644 index ad44d9de..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/index.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: Operations API ---- - -# Operations API - -The operations API provides a full set of capabilities for configuring, deploying, administering, and controlling Harper. To send operations to the operations API, you send a POST request to the operations API endpoint, which [defaults to port 9925](../deployments/configuration#operationsapi), on the root path, where the body is the operations object. These requests need to authenticated, which can be done with [basic auth](./security#basic-auth) or [JWT authentication](./security#jwt-auth). For example, a request to create a table would be performed as: - -```http -POST https://my-harperdb-server:9925/ -Authorization: Basic YourBase64EncodedInstanceUser:Pass -Content-Type: application/json - -{ - "operation": "create_table", - "table": "my-table" -} -``` - -The operations API reference is available below and categorized by topic: - -- [Quick Start Examples](operations-api/quickstart-examples) -- [Databases and Tables](operations-api/databases-and-tables) -- [NoSQL Operations](operations-api/nosql-operations) -- [Bulk Operations](operations-api/bulk-operations) -- [Users and Roles](operations-api/users-and-roles) -- [Clustering](operations-api/clustering) -- [Clustering with NATS](operations-api/clustering-nats) -- [Components](operations-api/components) -- [Registration](operations-api/registration) -- [Jobs](operations-api/jobs) -- [Logs](operations-api/logs) -- [System Operations](operations-api/system-operations) -- [Configuration](operations-api/configuration) -- [Certificate Management](operations-api/certificate-management) -- [Token Authentication](operations-api/token-authentication) -- [SQL Operations](operations-api/sql-operations) -- [Advanced JSON SQL Examples](operations-api/advanced-json-sql-examples) -- [Analytics](operations-api/analytics) - -• [Past Release API Documentation](https://olddocs.harperdb.io) - -## More Examples - -Here is an example of using `curl` to make an operations API request: - -```bash -curl --location --request POST 'https://instance-subdomain.harperdbcloud.com' \ ---header 'Authorization: Basic YourBase64EncodedInstanceUser:Pass' \ ---header 'Content-Type: application/json' \ ---data-raw '{ -"operation": "create_schema", -"schema": "dev" -}' -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/jobs.md b/versioned_docs/version-4.7/developers/operations-api/jobs.md deleted file mode 100644 index cf71fa00..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/jobs.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: Jobs ---- - -# Jobs - -## Get Job - -Returns job status, metrics, and messages for the specified job ID. - -- `operation` _(required)_ - must always be `get_job` -- `id` _(required)_ - the id of the job you wish to view - -### Body - -```json -{ - "operation": "get_job", - "id": "4a982782-929a-4507-8794-26dae1132def" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615798782, - "__updatedtime__": 1611615801207, - "created_datetime": 1611615798774, - "end_datetime": 1611615801206, - "id": "4a982782-929a-4507-8794-26dae1132def", - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "start_datetime": 1611615798805, - "status": "COMPLETE", - "type": "csv_url_load", - "user": "HDB_ADMIN", - "start_datetime_converted": "2021-01-25T23:03:18.805Z", - "end_datetime_converted": "2021-01-25T23:03:21.206Z" - } -] -``` - ---- - -## Search Jobs By Start Date - -Returns a list of job statuses, metrics, and messages for all jobs executed within the specified time window. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `search_jobs_by_start_date` -- `from_date` _(required)_ - the date you wish to start the search -- `to_date` _(required)_ - the date you wish to end the search - -### Body - -```json -{ - "operation": "search_jobs_by_start_date", - "from_date": "2021-01-25T22:05:27.464+0000", - "to_date": "2021-01-25T23:05:27.464+0000" -} -``` - -### Response: 200 - -```json -[ - { - "id": "942dd5cb-2368-48a5-8a10-8770ff7eb1f1", - "user": "HDB_ADMIN", - "type": "csv_url_load", - "status": "COMPLETE", - "start_datetime": 1611613284781, - "end_datetime": 1611613287204, - "job_body": null, - "message": "successfully loaded 350 of 350 records", - "created_datetime": 1611613284764, - "__createdtime__": 1611613284767, - "__updatedtime__": 1611613287207, - "start_datetime_converted": "2021-01-25T22:21:24.781Z", - "end_datetime_converted": "2021-01-25T22:21:27.204Z" - } -] -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/logs.md b/versioned_docs/version-4.7/developers/operations-api/logs.md deleted file mode 100644 index 2c2ba194..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/logs.md +++ /dev/null @@ -1,733 +0,0 @@ ---- -title: Logs ---- - -# Logs - -## Read Harper Log - -Returns log outputs from the primary Harper log based on the provided search criteria. [Read more about Harper logging here](../../administration/logging/standard-logging#read-logs-via-the-api). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_Log` -- `start` _(optional)_ - result to start with. Default is 0, the first log in `hdb.log`. Must be a number -- `limit` _(optional)_ - number of results returned. Default behavior is 1000. Must be a number -- `level` _(optional)_ - error level to filter on. Default behavior is all levels. Must be `notify`, `error`, `warn`, `info`, `debug` or `trace` -- `from` _(optional)_ - date to begin showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is first log in `hdb.log` -- `until` _(optional)_ - date to end showing log results. Must be `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default is last log in `hdb.log` -- `order` _(optional)_ - order to display logs desc or asc by timestamp. By default, will maintain `hdb.log` order -- `filter` _(optional)_ - a query string that must be a substring of each log line returned - -### Body - -```json -{ - "operation": "read_log", - "start": 0, - "limit": 1000, - "level": "error", - "from": "2021-01-25T22:05:27.464+0000", - "until": "2021-01-25T23:05:27.464+0000", - "order": "desc" -} -``` - -### Response: 200 - -```json -[ - { - "level": "notify", - "message": "Connected to cluster server.", - "timestamp": "2021-01-25T23:03:20.710Z", - "thread": "main/0", - "tags": [] - }, - { - "level": "warn", - "message": "Login failed", - "timestamp": "2021-01-25T22:24:45.113Z", - "thread": "http/9", - "tags": [] - }, - { - "level": "error", - "message": "unknown attribute 'name and breed'", - "timestamp": "2021-01-25T22:23:24.167Z", - "thread": "http/9", - "tags": [] - } -] -``` - ---- - -## Read Transaction Log - -Returns all transactions logged for the specified database table. You may filter your results with the optional from, to, and limit fields. [Read more about Harper transaction logs here](logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_transaction_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `from` _(optional)_ - time format must be millisecond-based epoch in UTC -- `to` _(optional)_ - time format must be millisecond-based epoch in UTC -- `limit` _(optional)_ - max number of logs you want to receive. Must be a number - -### Body - -```json -{ - "operation": "read_transaction_log", - "schema": "dev", - "table": "dog", - "from": 1560249020865, - "to": 1660585656639, - "limit": 10 -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619736, - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38, - "__updatedtime__": 1660165619688, - "__createdtime__": 1660165619688 - } - ] - }, - { - "operation": "insert", - "user": "admin", - "timestamp": 1660165619813, - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true, - "__updatedtime__": 1660165619797, - "__createdtime__": 1660165619797 - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true, - "__updatedtime__": 1660165619798, - "__createdtime__": 1660165619798 - } - ] - }, - { - "operation": "update", - "user": "admin", - "timestamp": 1660165620040, - "records": [ - { - "id": 1, - "dog_name": "Penny B", - "__updatedtime__": 1660165620036 - } - ] - } -] -``` - ---- - -## Delete Transaction Logs Before - -Deletes transaction log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_transaction_log_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_transaction_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1598290282817 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 26a6d3a6-6d77-40f9-bee7-8d6ef479a126" -} -``` - ---- - -## Read Audit Log - -AuditLog must be enabled in the Harper configuration file to make this request. Returns a verbose history of all transactions logged for the specified database table, including original data records. You may filter your results with the optional search_type and search_values fields. [Read more about Harper transaction logs here.](../../administration/logging/transaction-logging#read_transaction_log) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - possibilities are `hash_value`, `timestamp` and `username` -- `search_values` _(optional)_ - an array of string or numbers relating to search_type - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog" -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - } -] -``` - ---- - -## Read Audit Log by timestamp - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table between the specified time window. [Read more about Harper transaction logs here](logs#read-transaction-log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - timestamp -- `search_values` _(optional)_ - an array containing a maximum of two values \[`from_timestamp`, `to_timestamp`] defining the range of transactions you would like to view. - - Timestamp format is millisecond-based epoch in UTC - - If no items are supplied then all transactions are returned - - If only one entry is supplied then all transactions after the supplied timestamp will be returned - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "timestamp", - "search_values": [1660585740558, 1660585759710.56] -} -``` - -### Response: 200 - -```json -[ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } -] -``` - ---- - -## Read Audit Log by username - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed by the specified user. [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - username -- `search_values` _(optional)_ - the Harper user for whom you would like to view transactions - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "username", - "search_values": ["admin"] -} -``` - -### Response: 200 - -```json -{ - "admin": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "hash_values": [318], - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585716133.01, - "hash_values": [444], - "records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660585740558.415, - "hash_values": [444], - "records": [ - { - "id": 444, - "fur_type": "coarse", - "__updatedtime__": 1660585740556 - } - ], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585716128, - "__createdtime__": 1660585716128 - } - ] - }, - { - "operation": "delete", - "user_name": "admin", - "timestamp": 1660585759710.56, - "hash_values": [444], - "original_records": [ - { - "id": 444, - "dog_name": "Davis", - "__updatedtime__": 1660585740556, - "__createdtime__": 1660585716128, - "fur_type": "coarse" - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "hash_values": [318], - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Read Audit Log by hash_value - -AuditLog must be enabled in the Harper configuration file to make this request. Returns the transactions logged for the specified database table which were committed to the specified hash value(s). [Read more about Harper transaction logs here](../../administration/logging/transaction-logging#read_transaction_log). - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `read_audit_log` -- `schema` _(required)_ - schema under which the transaction log resides -- `table` _(required)_ - table under which the transaction log resides -- `search_type` _(optional)_ - hash_value -- `search_values` _(optional)_ - an array of hash_attributes for which you wish to see transaction logs - -### Body - -```json -{ - "operation": "read_audit_log", - "schema": "dev", - "table": "dog", - "search_type": "hash_value", - "search_values": [318] -} -``` - -### Response: 200 - -```json -{ - "318": [ - { - "operation": "insert", - "user_name": "admin", - "timestamp": 1660585635882.288, - "records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - }, - { - "operation": "update", - "user_name": "admin", - "timestamp": 1660586298457.224, - "records": [ - { - "id": 318, - "fur_type": "super fluffy", - "__updatedtime__": 1660586298455 - } - ], - "original_records": [ - { - "id": 318, - "dog_name": "Polliwog", - "__updatedtime__": 1660585635876, - "__createdtime__": 1660585635876 - } - ] - } - ] -} -``` - ---- - -## Delete Audit Logs Before - -AuditLog must be enabled in the Harper configuration file to make this request. Deletes audit log data for the specified database table that is older than the specified timestamp. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `delete_audit_logs_before` -- `schema` _(required)_ - schema under which the transaction log resides. Must be a string -- `table` _(required)_ - table under which the transaction log resides. Must be a string -- `timestamp` _(required)_ - records older than this date will be deleted. Format is millisecond-based epoch in UTC - -### Body - -```json -{ - "operation": "delete_audit_logs_before", - "schema": "dev", - "table": "dog", - "timestamp": 1660585759710.56 -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id 7479e5f8-a86e-4fc9-add7-749493bc100f" -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/nosql-operations.md b/versioned_docs/version-4.7/developers/operations-api/nosql-operations.md deleted file mode 100644 index db07e0da..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/nosql-operations.md +++ /dev/null @@ -1,389 +0,0 @@ ---- -title: NoSQL Operations ---- - -# NoSQL Operations - -## Insert - -Adds one or more rows of data to a database table. Primary keys of the inserted JSON record may be supplied on insert. If a primary key is not provided, then a GUID or incremented number (depending on type) will be generated for each record. - -- `operation` _(required)_ - must always be `insert` -- `database` _(optional)_ - database where the table you are inserting records into lives. The default is `data` -- `table` _(required)_ - table where you want to insert records -- `records` _(required)_ - array of one or more records for insert - -### Body - -```json -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "dog_name": "Harper", - "breed_id": 346, - "age": 7 - }, - { - "id": 9, - "dog_name": "Penny", - "breed_id": 154, - "age": 7 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 2 of 2 records", - "inserted_hashes": [8, 9], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Changes the values of specified attributes in one or more rows in a database table as identified by the primary key. NOTE: Primary key of the updated JSON record(s) MUST be supplied on update. - -- `operation` _(required)_ - must always be `update` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 1, - "weight_lbs": 55 - }, - { - "id": 2, - "owner": "Kyle B", - "weight_lbs": 35 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 2 of 2 records", - "update_hashes": [1, 3], - "skipped_hashes": [] -} -``` - ---- - -## Upsert - -Changes the values of specified attributes for rows with matching primary keys that exist in the table. Adds rows to the database table for primary keys that do not exist or are not provided. - -- `operation` _(required)_ - must always be `upsert` -- `database` _(optional)_ - database of the table you are updating records in. The default is `data` -- `table` _(required)_ - table where you want to update records -- `records` _(required)_ - array of one or more records for update - -### Body - -```json -{ - "operation": "upsert", - "database": "dev", - "table": "dog", - "records": [ - { - "id": 8, - "weight_lbs": 155 - }, - { - "name": "Bill", - "breed": "Pit Bull", - "id": 10, - "Age": 11, - "weight_lbs": 155 - }, - { - "name": "Harper", - "breed": "Mutt", - "age": 5, - "weight_lbs": 155 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "upserted 3 of 3 records", - "upserted_hashes": [8, 10, "ea06fc8e-717b-4c6c-b69d-b29014054ab7"] -} -``` - ---- - -## Delete - -Removes one or more rows of data from a specified table. - -- `operation` _(required)_ - must always be `delete` -- `database` _(optional)_ - database where the table you are deleting records lives. The default is `data` -- `table` _(required)_ - table where you want to deleting records -- `ids` _(required)_ - array of one or more primary key values, which identifies records to delete - -### Body - -```json -{ - "operation": "delete", - "database": "dev", - "table": "dog", - "ids": [1, 2] -} -``` - -### Response: 200 - -```json -{ - "message": "2 of 2 records successfully deleted", - "deleted_hashes": [1, 2], - "skipped_hashes": [] -} -``` - ---- - -## Search By ID - -Returns data from a table for one or more primary keys. - -- `operation` _(required)_ - must always be `search_by_id` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `ids` _(required)_ - array of primary keys to retrieve -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_id", - "database": "dev", - "table": "dog", - "ids": [1, 2], - "get_attributes": ["dog_name", "breed_id"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny", - "breed_id": 154 - }, - { - "dog_name": "Harper", - "breed_id": 346 - } -] -``` - ---- - -## Search By Value - -Returns data from a table for a matching value. - -- `operation` _(required)_ - must always be `search_by_value` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `attribute` _(required)_ - attribute you wish to search can be any attribute -- `search_attribute` - deprecated in favor of `attribute` -- `value` _(required)_ - value you wish to search - wild cards are allowed -- `search_value` - deprecated in favor of `value` -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes - -### Body - -```json -{ - "operation": "search_by_value", - "database": "dev", - "table": "dog", - "attribute": "owner_name", - "value": "Ky*", - "get_attributes": ["id", "dog_name"] -} -``` - -### Response: 200 - -```json -[ - { - "dog_name": "Penny" - }, - { - "dog_name": "Kato" - } -] -``` - ---- - -## Search By Conditions - -Returns data from a table for one or more matching conditions. This supports grouping of conditions to indicate order of operations as well. - -- `operation` _(required)_ - must always be `search_by_conditions` -- `database` _(optional)_ - database where the table you are searching lives. The default is `data` -- `table` _(required)_ - table you wish to search -- `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` -- `offset` _(optional)_ - the number of records that the query results will skip. The default is `0` -- `limit` _(optional)_ - the number of records that the query results will include. The default is `null`, resulting in no limit -- `sort` _optional_ - This is an object that indicates the sort order. It has the following properties: - - `attribute` _(required)_ - The attribute to sort by - - `descending` _(optional)_ - If true, will sort in descending order (defaults to ascending order) - - `next` _(optional)_ - This can define the next sort object that will be used to break ties for sorting when there are multiple records with the same value for the first attribute (follows the same structure as `sort`). -- `get_attributes` _(required)_ - define which attributes you want returned. Use `['*']` to return all attributes -- `conditions` _(required)_ - the array of conditions objects, specified below, to filter by. Must include one or more object in the array that are a condition or a grouped set of conditions. A condition has the following properties: - - `attribute` _(required)_ - the attribute you wish to search, can be any attribute - - `search_attribute` - deprecated in favor of `attribute` - - `comparator` _(required)_ - the type of search to perform - `equals`, `not_equal`, `contains`, `starts_with`, `ends_with`, `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `between` - - `search_type` - deprecated in favor of `comparator` - - `value` _(required)_ - case-sensitive value you wish to search. If the `comparator` is `between` then use an array of two values to search between (both inclusive) - - `search_value` - deprecated in favor of `value` - Or a set of grouped conditions has the following properties: - - `operator` _(optional)_ - the operator used between each condition - `and`, `or`. The default is `and` - - `conditions` _(required)_ - the array of conditions objects as described above. - -### Body - -```json -{ - "operation": "search_by_conditions", - "database": "dev", - "table": "dog", - "operator": "and", - "offset": 0, - "limit": 10, - "sort": { - "attribute": "id", - "next": { - "attribute": "age", - "descending": true - } - }, - "get_attributes": ["*"], - "conditions": [ - { - "attribute": "age", - "comparator": "between", - "value": [5, 8] - }, - { - "attribute": "weight_lbs", - "comparator": "greater_than", - "value": 40 - }, - { - "operator": "or", - "conditions": [ - { - "attribute": "adorable", - "comparator": "equals", - "value": true - }, - { - "attribute": "lovable", - "comparator": "equals", - "value": true - } - ] - } - ] -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1620227719791, - "__updatedtime__": 1620227719791, - "adorable": true, - "age": 7, - "breed_id": 346, - "dog_name": "Harper", - "id": 2, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 7, - "breed_id": 348, - "dog_name": "Alby", - "id": 3, - "owner_name": "Kaylan", - "weight_lbs": 84 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 6, - "breed_id": 347, - "dog_name": "Billy", - "id": 4, - "owner_name": "Zach", - "weight_lbs": 60 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 5, - "breed_id": 250, - "dog_name": "Gemma", - "id": 8, - "owner_name": "Stephen", - "weight_lbs": 55 - }, - { - "__createdtime__": 1620227719792, - "__updatedtime__": 1620227719792, - "adorable": true, - "age": 8, - "breed_id": 104, - "dog_name": "Bode", - "id": 11, - "owner_name": "Margo", - "weight_lbs": 75 - } -] -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md b/versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md deleted file mode 100644 index a6c8f637..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: Quick Start Examples ---- - -# Quick Start Examples - -Harper recommends utilizing [Harper Applications](../../developers/applications/) for defining databases, tables, and other functionality. However, this guide is a great way to get started using on the Harper Operations API. - -## Create dog Table - -We first need to create a table. Since our company is named after our CEO's dog, lets create a table to store all our employees' dogs. We'll call this table, `dogs`. - -Tables in Harper are schema-less, so we don't need to add any attributes other than a primary_key (in pre 4.2 versions this was referred to as the hash_attribute) to create this table. - -Harper does offer a `database` parameter that can be used to hold logical groupings of tables. The parameter is optional and if not provided the operation will default to using a database named `data`. - -If you receive an error response, make sure your Basic Authentication user and password match those you entered during the installation process. - -### Body - -```json -{ - "operation": "create_table", - "table": "dog", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.dog' successfully created." -} -``` - ---- - -## Create breed Table - -Now that we have a table to store our dog data, we also want to create a table to track known breeds. Just as with the dog table, the only attribute we need to specify is the `primary_key`. - -### Body - -```json -{ - "operation": "create_table", - "table": "breed", - "primary_key": "id" -} -``` - -### Response: 200 - -```json -{ - "message": "table 'data.breed' successfully created." -} -``` - ---- - -## Insert 1 Dog - -We're ready to add some dog data. Penny is our CTO's pup, so she gets ID 1 or we're all fired. We are specifying attributes in this call, but this doesn't prevent us from specifying additional attributes in subsequent calls. - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny", - "owner_name": "Kyle", - "breed_id": 154, - "age": 7, - "weight_lbs": 38 - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Insert Multiple Dogs - -Let's add some more Harper doggies! We can add as many dog objects as we want into the records collection. If you're adding a lot of objects, we would recommend using the .csv upload option (see the next section where we populate the breed table). - -### Body - -```json -{ - "operation": "insert", - "table": "dog", - "records": [ - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "breed_id": 346, - "age": 7, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 3, - "dog_name": "Alby", - "owner_name": "Kaylan", - "breed_id": 348, - "age": 7, - "weight_lbs": 84, - "adorable": true - }, - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "breed_id": 347, - "age": 6, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "breed_id": 348, - "age": 8, - "weight_lbs": 15, - "adorable": true - }, - { - "id": 6, - "dog_name": "Kato", - "owner_name": "Kyle", - "breed_id": 351, - "age": 6, - "weight_lbs": 32, - "adorable": true - }, - { - "id": 7, - "dog_name": "Simon", - "owner_name": "Fred", - "breed_id": 349, - "age": 3, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "breed_id": 350, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 9, - "dog_name": "Yeti", - "owner_name": "Jaxon", - "breed_id": 200, - "age": 5, - "weight_lbs": 55, - "adorable": true - }, - { - "id": 10, - "dog_name": "Monkey", - "owner_name": "Aron", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - }, - { - "id": 11, - "dog_name": "Bode", - "owner_name": "Margo", - "breed_id": 104, - "age": 8, - "weight_lbs": 75, - "adorable": true - }, - { - "id": 12, - "dog_name": "Tucker", - "owner_name": "David", - "breed_id": 346, - "age": 2, - "weight_lbs": 60, - "adorable": true - }, - { - "id": 13, - "dog_name": "Jagger", - "owner_name": "Margo", - "breed_id": 271, - "age": 7, - "weight_lbs": 35, - "adorable": true - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 12 of 12 records", - "inserted_hashes": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - "skipped_hashes": [] -} -``` - ---- - -## Bulk Insert Breeds Via CSV - -We need to populate the 'breed' table with some data so we can reference it later. For larger data sets, we recommend using our CSV upload option. - -Each header in a column will be considered as an attribute, and each row in the file will be a row in the table. Simply specify the file path and the table to upload to, and Harper will take care of the rest. You can pull the breeds.csv file from here: [https://s3.amazonaws.com/complimentarydata/breeds.csv](https://s3.amazonaws.com/complimentarydata/breeds.csv) - -### Body - -```json -{ - "operation": "csv_url_load", - "table": "breed", - "csv_url": "https://s3.amazonaws.com/complimentarydata/breeds.csv" -} -``` - -### Response: 200 - -```json -{ - "message": "Starting job with id e77d63b9-70d5-499c-960f-6736718a4369", - "job_id": "e77d63b9-70d5-499c-960f-6736718a4369" -} -``` - ---- - -## Update 1 Dog Using NoSQL - -Harper supports NoSQL and SQL commands. We're going to update the dog table to show Penny's last initial using our NoSQL API. - -### Body - -```json -{ - "operation": "update", - "table": "dog", - "records": [ - { - "id": 1, - "dog_name": "Penny B" - } - ] -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Select a Dog by ID Using SQL - -Now we're going to use a simple SQL SELECT call to pull Penny's updated data. Note we now see Penny's last initial in the dog name. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM data.dog where id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "owner_name": "Kyle", - "adorable": null, - "breed_id": 154, - "__updatedtime__": 1610749428575, - "dog_name": "Penny B", - "weight_lbs": 38, - "id": 1, - "age": 7, - "__createdtime__": 1610749386566 - } -] -``` - ---- - -## Select Dogs and Join Breed - -Here's a more complex SQL command joining the breed table with the dog table. We will also pull only the pups belonging to Kyle, Zach, and Stephen. - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT d.id, d.dog_name, d.owner_name, b.name, b.section FROM data.dog AS d INNER JOIN data.breed AS b ON d.breed_id = b.id WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') AND b.section = 'Mutt' ORDER BY d.dog_name" -} -``` - -### Response: 200 - -```json -[ - { - "id": 4, - "dog_name": "Billy", - "owner_name": "Zach", - "name": "LABRADOR / GREAT DANE MIX", - "section": "Mutt" - }, - { - "id": 8, - "dog_name": "Gemma", - "owner_name": "Stephen", - "name": "SHORT HAIRED SETTER MIX", - "section": "Mutt" - }, - { - "id": 2, - "dog_name": "Harper", - "owner_name": "Stephen", - "name": "HUSKY MIX", - "section": "Mutt" - }, - { - "id": 5, - "dog_name": "Rose Merry", - "owner_name": "Zach", - "name": "TERRIER MIX", - "section": "Mutt" - } -] -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/registration.md b/versioned_docs/version-4.7/developers/operations-api/registration.md deleted file mode 100644 index d5d278c5..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/registration.md +++ /dev/null @@ -1,231 +0,0 @@ ---- -title: Registration ---- - -# Registration - -## Registration Info - -Returns the registration data of the Harper instance. - -- `operation` _(required)_ - must always be `registration_info` - -### Body - -```json -{ - "operation": "registration_info" -} -``` - -### Response: 200 - -```json -{ - "registered": true, - "version": "4.2.0", - "ram_allocation": 2048, - "license_expiration_date": "2022-01-15" -} -``` - ---- - -## Install Usage License - -Install a Harper license for a block of usage. Multiple usage blocks may be installed, and they will be used up sequentially, with the earliest installed blocks used first. A license is installed -by creating a string that consists of three base64url encoded blocks, separated by dots. The three blocks consist of: - -- `header`: This is a JSON object with two properties: - - `typ`: should be "Harper-License" - - `alg`: should be "EdDSA" - -This JSON object should be converted to base64url (conversion from utf-8 to base64url) and is the first base64url block. - -- license payload: This is a JSON object with properties: - - `id` _(required)_ - A unique id for the license - - `level` _(required)_ - Usage level number - - `region` _(required)_ - The region id where this license can be used - - `reads` _(required)_ - The number of allowed reads - - `readBytes` _(required)_ - The number of allowed read bytes - - `writes` _(required)_ - The number of allowed writes - - `writeBytes` _(required)_ - The number of allowed write bytes - - `realTimeMessages` _(required)_ - The number of allowed real-time messages - - `realTimeBytes` _(required)_ - The number of allowed real-time message bytes - - `cpuTime` _(optional)_ - The allowed amount of CPU time consumed by application code - - `storage` _(optional)_ - Maximum of storage that may be used - - `expiration` _(required)_ - The date when this block expires, as an ISO date - -This JSON object should be converted to base64url (conversion from utf-8 to base64url) and is the second base64url block. - -For example: - -```json -{ - "id": "license-717b-4c6c-b69d-b29014054ab7", - "level": 2, - "region": "us-nw-2", - "reads": 2000000000, - "readBytes": 8000000000000, - "writes": 500000000, - "writeBytes": 1000000000000, - "realTimeMessages": 10000000000, - "realTimeBytes": 40000000000000, - "cpuTime": 108000, - "storage": 400000000000000, - "expiration": "2025-07-25T21:17:21.248Z" -} -``` - -- `signature`: This is the cryptographic signature, signed by Harper, of the first two blocks, separated by a dot, `header.payload`. This is also converted to base64url. - -The three base64url blocks are combined to form the `license` property value in the operation. - -- `operation` _(required)_ - must always be `install_usage_license` -- `license` _(required)_ - This is the combination of the three blocks in the form `header.payload.signature` - -### Body - -```json -{ - "operation": "install_usage_license", - "license": "abc...0123.abc...0123.abc...0123" -} -``` - -### Response: 200 - -```json -{ - "message": "Successfully installed usage license" -} -``` - ---- - -## Get Usage Licenses - -This will retrieve and return _all_ usage licenses (including expired, exhausted, and licenses in any other state), with counts of how much of the limits have been consumed. - -- `operation` _(required)_ - must always be `get_usage_licenses` -- `region` _(optional)_ - will filter by region when supplied - -### Body - -```json -{ - "operation": "get_usage_licenses" -} -``` - -### Response: 200 - -```json -[ - { - "id": "license-717b-4c6c-b69d-b29014054ab7", - "level": 2, - "region": "us-nw-2", - "reads": 2000000000, - "usedReads": 1100000000, - "readBytes": 8000000000000, - "usedReadBytes": 3000000000000, - "writes": 500000000, - "usedWrites": 300000000, - "writeBytes": 1000000000000, - "usedWriteBytes": 4300000000000, - "realTimeMessages": 10000000000, - "usedRealTimeMessages": 2000000000, - "realTimeBytes": 40000000000000, - "usedRealTimeBytes": 13000000000000, - "cpuTime": 108000, - "usedCpuTime": 41000, - "storage": 400000000000000, - "expiration": "2025-07-25T21:17:21.248Z" - }, - { - "id": "license-4c6c-b69d-b29014054ab7-717b", - "level": 2, - "region": "us-nw-2", - "reads": 2000000000, - "usedReads": 0, - "readBytes": 8000000000000, - "usedReadBytes": 0, - "writes": 500000000, - "usedWrites": 0, - "writeBytes": 1000000000000, - "usedWriteBytes": 0, - "realTimeMessages": 10000000000, - "usedRealTimeMessages": 0, - "realTimeBytes": 40000000000000, - "usedRealTimeBytes": 0, - "cpuTime": 108000, - "usedCpuTime": 0, - "storage": 400000000000000, - "expiration": "2025-09-25T21:17:21.248Z" - }, - { - "id": "license-4c6c-b69d-b29014054ab7-717b", - "level": 2, - "region": "us-se-2", - "reads": 2000000000, - "usedReads": 0, - "readBytes": 8000000000000, - "usedReadBytes": 0, - "writes": 500000000, - "usedWrites": 0, - "writeBytes": 1000000000000, - "usedWriteBytes": 0, - "realTimeMessages": 10000000000, - "usedRealTimeMessages": 0, - "realTimeBytes": 40000000000000, - "usedRealTimeBytes": 0, - "cpuTime": 108000, - "usedCpuTime": 0, - "storage": 400000000000000, - "expiration": "2025-11-25T21:17:21.248Z" - } -] -``` - ---- - -## Get Fingerprint - -(Deprecated) -Returns the Harper fingerprint, uniquely generated based on the machine, for licensing purposes. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_fingerprint` - -### Body - -```json -{ - "operation": "get_fingerprint" -} -``` - ---- - -## Set License - -(Deprecated) -Sets the Harper license as generated by Harper License Management software. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_license` -- `key` _(required)_ - your license key -- `company` _(required)_ - the company that was used in the license - -### Body - -```json -{ - "operation": "set_license", - "key": "", - "company": "" -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/sql-operations.md b/versioned_docs/version-4.7/developers/operations-api/sql-operations.md deleted file mode 100644 index 4b7076bb..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/sql-operations.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -title: SQL Operations ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Operations - -## Select - -Executes the provided SQL statement. The SELECT statement is used to query data from the database. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -[ - { - "id": 1, - "age": 7, - "dog_name": "Penny", - "weight_lbs": 38, - "breed_id": 154, - "owner_name": "Kyle", - "adorable": true, - "__createdtime__": 1611614106043, - "__updatedtime__": 1611614119507 - } -] -``` - ---- - -## Insert - -Executes the provided SQL statement. The INSERT statement is used to add one or more rows to a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" -} -``` - -### Response: 200 - -```json -{ - "message": "inserted 1 of 1 records", - "inserted_hashes": [22], - "skipped_hashes": [] -} -``` - ---- - -## Update - -Executes the provided SQL statement. The UPDATE statement is used to change the values of specified attributes in one or more rows in a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "update_hashes": [1], - "skipped_hashes": [] -} -``` - ---- - -## Delete - -Executes the provided SQL statement. The DELETE statement is used to remove one or more rows of data from a database table. - -- `operation` _(required)_ - must always be `sql` -- `sql` _(required)_ - use standard SQL - -### Body - -```json -{ - "operation": "sql", - "sql": "DELETE FROM dev.dog WHERE id = 1" -} -``` - -### Response: 200 - -```json -{ - "message": "1 of 1 record successfully deleted", - "deleted_hashes": [1], - "skipped_hashes": [] -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/system-operations.md b/versioned_docs/version-4.7/developers/operations-api/system-operations.md deleted file mode 100644 index d39e93cb..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/system-operations.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: System Operations ---- - -# System Operations - -## Restart - -Restarts the Harper instance. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart` - -### Body - -```json -{ - "operation": "restart" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting HarperDB. This may take up to 60 seconds." -} -``` - ---- - -## Restart Service - -Restarts servers for the specified Harper service. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `restart_service` -- `service` _(required)_ - must be one of: `http_workers`, `clustering_config` or `clustering` -- `replicated` _(optional)_ - must be a boolean. If set to `true`, Harper will replicate the restart service operation across all nodes in the cluster. The restart will occur as a rolling restart, ensuring that each node is fully restarted before the next node begins restarting. - -### Body - -```json -{ - "operation": "restart_service", - "service": "http_workers" -} -``` - -### Response: 200 - -```json -{ - "message": "Restarting http_workers" -} -``` - ---- - -## System Information - -Returns detailed metrics on the host system. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `system_information` -- `attributes` _(optional)_ - string array of top level attributes desired in the response, if no value is supplied all attributes will be returned. Available attributes are: ['system', 'time', 'cpu', 'memory', 'disk', 'network', 'harperdb_processes', 'table_size', 'metrics', 'threads', 'replication'] - -### Body - -```json -{ - "operation": "system_information" -} -``` - ---- - -## Set Status - -Sets a status value that can be used for application-specific status tracking. Status values are stored in memory and are not persisted across restarts. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `set_status` -- `id` _(required)_ - the key identifier for the status -- `status` _(required)_ - the status value to set (string between 1-512 characters) - -### Body - -```json -{ - "operation": "set_status", - "id": "primary", - "status": "active" -} -``` - -### Response: 200 - -```json -{ - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 -} -``` - -### Notes - -- The `id` parameter must be one of the allowed status types: 'primary', 'maintenance', or 'availability' -- If no `id` is specified, it defaults to 'primary' -- For 'availability' status, only 'Available' or 'Unavailable' values are accepted -- For other status types, any string value is accepted - ---- - -## Get Status - -Retrieves a status value previously set with the set_status operation. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `get_status` -- `id` _(optional)_ - the key identifier for the status to retrieve (defaults to all statuses if not provided) - -### Body - -```json -{ - "operation": "get_status", - "id": "primary" -} -``` - -### Response: 200 - -```json -{ - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 -} -``` - -If no id parameter is provided, all status values will be returned: - -```json -[ - { - "id": "primary", - "status": "active", - "__createdtime__": 1621364589543, - "__updatedtime__": 1621364589543 - }, - { - "id": "maintenance", - "status": "scheduled", - "__createdtime__": 1621364600123, - "__updatedtime__": 1621364600123 - } -] -``` - ---- - -## Clear Status - -Removes a status entry by its ID. - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `clear_status` -- `id` _(required)_ - the key identifier for the status to remove - -### Body - -```json -{ - "operation": "clear_status", - "id": "primary" -} -``` - -### Response: 200 - -```json -{ - "message": "Status successfully cleared" -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/token-authentication.md b/versioned_docs/version-4.7/developers/operations-api/token-authentication.md deleted file mode 100644 index 178db842..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/token-authentication.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Token Authentication ---- - -# Token Authentication - -## Create Authentication Tokens - -Creates the tokens needed for authentication: operation & refresh token. - -_Note - this operation does not require authorization to be set_ - -- `operation` _(required)_ - must always be `create_authentication_tokens` -- `username` _(required)_ - username of user to generate tokens for -- `password` _(required)_ - password of user to generate tokens for - -### Body - -```json -{ - "operation": "create_authentication_tokens", - "username": "", - "password": "" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA1MTUwMzQ5LCJzdWIiOiJvcGVyYXRpb24ifQ.TlV93BqavQVQntXTt_WeY5IjAuCshfd6RzhihLWFWhu1qEKLHdwg9o5Z4ASaNmfuyKBqbFw65IbOYKd348EXeC_T6d0GO3yUhICYWXkqhQnxVW_T-ECKc7m5Bty9HTgfeaJ2e2yW55nbZYWG_gLtNgObUjCziX20-gGGR25sNTRm78mLQPYQkBJph6WXwAuyQrX704h0NfvNqyAZSwjxgtjuuEftTJ7FutLrQSLGIBIYq9nsHrFkheiDSn-C8_WKJ_zATa4YIofjqn9g5wA6o_7kSNaU2-gWnCm_jbcAcfvOmXh6rd89z8pwPqnC0f131qHIBps9UHaC1oozzmu_C6bsg7905OoAdFFY42Vojs98SMbfRApRvwaS4SprBsam3izODNI64ZUBREu3l4SZDalUf2kN8XPVWkI1LKq_mZsdtqr1r11Z9xslI1wVdxjunYeanjBhs7_j2HTX7ieVGn1a23cWceUk8F1HDGe_KEuPQs03R73V8acq_freh-kPhIa4eLqmcHeBw3WcyNGW8GuP8kyQRkGuO5sQSzZqbr_YSbZdSShZWTWDE6RYYC9ZV9KJtHVxhs0hexUpcoqO8OtJocyltRjtDjhSm9oUxszYRaALu-h8YadZT9dEKzsyQIt30d7LS9ETmmGWx4nKSTME2bV21PnDv_rEc5R6gnE", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6IkhEQl9BRE1JTiIsImlhdCI6MTYwNTA2Mzk0OSwiZXhwIjoxNjA3NjU1OTQ5LCJzdWIiOiJyZWZyZXNoIn0.znhJhkdSROBPP_GLRzAxYdjgQ3BuqpAbQB7zMSSOQJ3s83HnmZ10Bnpw_3L2aF-tOFgz_t6HUAvn26fNOLsspJD2aOvHPcVS4yLKS5nagpA6ar_pqng9f6Ebfs8ohguLCfHnHRJ8poLxuWRvWW9_9pIlDiwsj4yo3Mbxi3mW8Bbtnk2MwiNHFxTksD12Ne8EWz8q2jic5MjArqBBgR373oYoWU1oxpTM6gIsZCBRowXcc9XFy2vyRoggEUU4ISRFQ4ZY9ayJ-_jleSDCUamJSNQsdb1OUTvc6CxeYlLjCoV0ijRUB6p2XWNVezFhDu8yGqOeyGFJzArhxbVc_pl4UYd5aUVxhrO9DdhG29cY_mHV0FqfXphR9QllK--LJFTP4aFqkCxnVr7HSa17hL0ZVK1HaKrx21PAdCkVNZpD6J3RtRbTkfnIB_C3Be9jhOV3vpTf7ZGn_Bs3CPJi_sL313Z1yKSDAS5rXTPceEOcTPHjzkMP9Wz19KfFq_0kuiZdDmeYNqJeFPAgGJ-S0tO51krzyGqLyCCA32_W104GR8OoQi2gEED6HIx2G0-1rnLnefN6eHQiY5r-Q3Oj9e2y3EvqqgWOmEDw88-SjPTwQVnMbBHYN2RfluU7EmvDh6Saoe79Lhlu8ZeSJ1x6ZgA8-Cirraz1_526Tn8v5FGDfrc" -} -``` - ---- - -## Refresh Operation Token - -This operation creates a new operation token. - -- `operation` _(required)_ - must always be `refresh_operation_token` -- `refresh_token` _(required)_ - the refresh token that was provided when tokens were created - -### Body - -```json -{ - "operation": "refresh_operation_token", - "refresh_token": "EXISTING_REFRESH_TOKEN" -} -``` - -### Response: 200 - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ1MTc4Nzk1MjMsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMywiYWN0aXZlIjp0cnVlLCJhdXRoX3Rva2VuIjpudWxsLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDUxNzg3OTUyMSwiX191cGRhdGVkdGltZV9fIjoxNjA0NTE3ODc5NTIxLCJpZCI6IjZhYmRjNGJhLWU5MjQtNDlhNi1iOGY0LWM1NWUxYmQ0OTYzZCIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6IkhEQl9BRE1JTiJ9LCJpYXQiOjE2MDUwNjQ0MjMsImV4cCI6MTYwNTE1MDgyMywic3ViIjoib3BlcmF0aW9uIn0.VVZdhlh7_xFEaGPwhAh6VJ1d7eisiF3ok3ZwLTQAMWZB6umb2S7pPSTbXAmqAGHRlFAK3BYfnwT3YWt0gZbHvk24_0x3s_dej3PYJ8khIxzMjqpkR6qSjQIC2dhKqpwRPNtoqW_xnep9L-qf5iPtqkwsqWhF1c5VSN8nFouLWMZSuJ6Mag04soNhFvY0AF6QiTyzajMTb6uurRMWOnxk8hwMrY_5xtupabqtZheXP_0DV8l10B7GFi_oWf_lDLmwRmNbeUfW8ZyCIJMj36bjN3PsfVIxog87SWKKCwbWZWfJWw0KEph-HvU0ay35deyGWPIaDQmujuh2vtz-B0GoIAC58PJdXNyQRzES_nSb6Oqc_wGZsLM6EsNn_lrIp3mK_3a5jirZ8s6Z2SfcYKaLF2hCevdm05gRjFJ6ijxZrUSOR2S415wLxmqCCWCp_-sEUz8erUrf07_aj-Bv99GUub4b_znOsQF3uABKd4KKff2cNSMhAa-6sro5GDRRJg376dcLi2_9HOZbnSo90zrpVq8RNV900aydyzDdlXkZja8jdHBk4mxSSewYBvM7up6I0G4X-ZlzFOp30T7kjdLa6480Qp34iYRMMtq0Htpb5k2jPt8dNFnzW-Q2eRy1wNBbH3cCH0rd7_BIGuTCrl4hGU8QjlBiF7Gj0_-uJYhKnhg" -} -``` diff --git a/versioned_docs/version-4.7/developers/operations-api/users-and-roles.md b/versioned_docs/version-4.7/developers/operations-api/users-and-roles.md deleted file mode 100644 index 91f222b9..00000000 --- a/versioned_docs/version-4.7/developers/operations-api/users-and-roles.md +++ /dev/null @@ -1,508 +0,0 @@ ---- -title: Users and Roles ---- - -# Users and Roles - -## List Roles - -Returns a list of all roles. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_roles` - -### Body - -```json -{ - "operation": "list_roles" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1611615061106, - "__updatedtime__": 1611615061106, - "id": "05c2ffcd-f780-40b1-9432-cfe8ba5ad890", - "permission": { - "super_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "role": "developer" - }, - { - "__createdtime__": 1610749235614, - "__updatedtime__": 1610749235614, - "id": "136f03fa-a0e9-46c3-bd5d-7f3e7dd5b564", - "permission": { - "cluster_user": true - }, - "role": "cluster_user" - }, - { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - } -] -``` - ---- - -## Add Role - -Creates a new role with the specified permissions. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_role` -- `role` _(required)_ - name of role you are defining -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "add_role", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": true, - "insert": true, - "update": true - } - ] - } - } - } - }, - "id": "0a9368b0-bd81-482f-9f5a-8722e3582f96", - "__updatedtime__": 1598549532897, - "__createdtime__": 1598549532897 -} -``` - ---- - -## Alter Role - -Modifies an existing role with the specified permissions. updates permissions from an existing role. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_role` -- `id` _(required)_ - the id value for the role you are altering -- `role` _(optional)_ - name value to update on the role you are altering -- `permission` _(required)_ - object defining permissions for users associated with this role: - - `super_user` _(optional)_ - boolean which, if set to true, gives users associated with this role full access to all operations and methods. If not included, value will be assumed to be false. - - `structure_user` _(optional)_ - boolean OR array of database names (as strings). If boolean, user can create new databases and tables. If array of strings, users can only manage tables within the specified databases. This overrides any individual table permissions for specified databases, or for all databases if the value is true. - -### Body - -```json -{ - "operation": "alter_role", - "id": "f92162e2-cd17-450c-aae0-372a76859038", - "role": "another_developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - } -} -``` - -### Response: 200 - -```json -{ - "id": "a7cb91e9-32e4-4dbf-a327-fab4fa9191ea", - "role": "developer", - "permission": { - "super_user": false, - "structure_user": false, - "dev": { - "tables": { - "dog": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "name", - "read": false, - "insert": true, - "update": true - } - ] - } - } - } - }, - "__updatedtime__": 1598549996106 -} -``` - ---- - -## Drop Role - -Deletes an existing role from the database. NOTE: Role with associated users cannot be dropped. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - this must always be `drop_role` -- `id` _(required)_ - this is the id of the role you are dropping - -### Body - -```json -{ - "operation": "drop_role", - "id": "developer" -} -``` - -### Response: 200 - -```json -{ - "message": "developer successfully deleted" -} -``` - ---- - -## List Users - -Returns a list of all users. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `list_users` - -### Body - -```json -{ - "operation": "list_users" -} -``` - -### Response: 200 - -```json -[ - { - "__createdtime__": 1635520961165, - "__updatedtime__": 1635520961165, - "active": true, - "role": { - "__createdtime__": 1635520961161, - "__updatedtime__": 1635520961161, - "id": "7c78ef13-c1f3-4063-8ea3-725127a78279", - "permission": { - "super_user": true, - "system": { - "tables": { - "hdb_table": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_attribute": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_schema": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_user": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_role": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_job": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_license": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_info": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_nodes": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - }, - "hdb_temp": { - "read": true, - "insert": false, - "update": false, - "delete": false, - "attribute_permissions": [] - } - } - } - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" - } -] -``` - ---- - -## User Info - -Returns user data for the associated user credentials. - -- `operation` _(required)_ - must always be `user_info` - -### Body - -```json -{ - "operation": "user_info" -} -``` - -### Response: 200 - -```json -{ - "__createdtime__": 1610749235611, - "__updatedtime__": 1610749235611, - "active": true, - "role": { - "__createdtime__": 1610749235609, - "__updatedtime__": 1610749235609, - "id": "745b3138-a7cf-455a-8256-ac03722eef12", - "permission": { - "super_user": true - }, - "role": "super_user" - }, - "username": "HDB_ADMIN" -} -``` - ---- - -## Add User - -Creates a new user with the specified role and credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `add_user` -- `role` _(required)_ - 'role' name value of the role you wish to assign to the user. See `add_role` for more detail -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash -- `password` _(required)_ - clear text for password. Harper will encrypt the password upon receipt -- `active` _(required)_ - boolean value for status of user's access to your Harper instance. If set to false, user will not be able to access your instance of Harper. - -### Body - -```json -{ - "operation": "add_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "hdb_user successfully added" -} -``` - ---- - -## Alter User - -Modifies an existing user's role and/or credentials. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `alter_user` -- `username` _(required)_ - username assigned to the user. It can not be altered after adding the user. It serves as the hash. -- `password` _(optional)_ - clear text for password. Harper will encrypt the password upon receipt -- `role` _(optional)_ - `role` name value of the role you wish to assign to the user. See `add_role` for more detail -- `active` _(optional)_ - status of user's access to your Harper instance. See `add_role` for more detail - -### Body - -```json -{ - "operation": "alter_user", - "role": "role_name", - "username": "hdb_user", - "password": "password", - "active": true -} -``` - -### Response: 200 - -```json -{ - "message": "updated 1 of 1 records", - "new_attributes": [], - "txn_time": 1611615114397.988, - "update_hashes": ["hdb_user"], - "skipped_hashes": [] -} -``` - ---- - -## Drop User - -Deletes an existing user by username. [Learn more about Harper roles here.](../security/users-and-roles) - -_Operation is restricted to super_user roles only_ - -- `operation` _(required)_ - must always be `drop_user` -- `username` _(required)_ - username assigned to the user - -### Body - -```json -{ - "operation": "drop_user", - "username": "sgoldberg" -} -``` - -### Response: 200 - -```json -{ - "message": "sgoldberg successfully deleted" -} -``` diff --git a/versioned_docs/version-4.7/developers/real-time.md b/versioned_docs/version-4.7/developers/real-time.md deleted file mode 100644 index 9c5c79e4..00000000 --- a/versioned_docs/version-4.7/developers/real-time.md +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: Real-Time ---- - -# Real-Time - -## Real-Time - -Harper provides real-time access to data and messaging. This allows clients to monitor and subscribe to data for changes in real-time as well as handling data-oriented messaging. Harper supports multiple standardized protocols to facilitate diverse standards-based client interaction. - -Harper real-time communication is based around database tables. Declared tables are the basis for monitoring data, and defining "topics" for publishing and subscribing to messages. Declaring a table that establishes a topic can be as simple as adding a table with no attributes to your [schema.graphql in a Harper application folder](./applications/): - -``` -type MyTopic @table @export -``` - -You can then subscribe to records or sub-topics in this topic/namespace, as well as save data and publish messages, with the protocols discussed below. - -### Content Negotiation - -Harper is a database, not a generic broker, and therefore highly adept at handling _structured_ data. Data can be published and subscribed in all supported structured/object formats, including JSON, CBOR, and MessagePack, and the data will be stored and handled as structured data. This means that different clients can individually choose which format they prefer, both for inbound and outbound messages. One client could publish in JSON, and another client could choose to receive messages in CBOR. - -## Protocols - -### MQTT - -Harper supports MQTT as an interface to this real-time data delivery. It is important to note that MQTT in Harper is not just a generic pub/sub hub, but is deeply integrated with the database providing subscriptions directly to database records, and publishing to these records. In this document we will explain how MQTT pub/sub concepts are aligned and integrated with database functionality. - -#### Configuration - -Harper supports MQTT with its `mqtt` server module and Harper supports MQTT over standard TCP sockets or over WebSockets. This is enabled by default, but can be configured in your `harperdb-config.yaml` configuration, allowing you to change which ports it listens on, if secure TLS connections are used, and MQTT is accepted over WebSockets: - -```yaml -mqtt: - network: - port: 1883 - securePort: 8883 # for TLS - webSocket: true # will also enable WS support through the default HTTP interface/port - mTLS: false - requireAuthentication: true -``` - -Note that if you are using WebSockets for MQTT, the sub-protocol should be set to "mqtt" (this is required by the MQTT specification, and should be included by any conformant client): `Sec-WebSocket-Protocol: mqtt`. mTLS is also supported by enabling it in the configuration and using the certificate authority from the TLS section of the configuration. See the [configuration documentation for more information](../deployments/configuration). - -#### Capabilities - -Harper's MQTT capabilities includes support for MQTT versions v3.1 and v5 with standard publish and subscription capabilities with multi-level topics, QoS 0 and 1 levels, and durable (non-clean) sessions. MQTT supports QoS 2 interaction, but doesn't guarantee exactly once delivery (although any guarantees of exactly once over unstable networks is a fictional aspiration). MQTT doesn't currently support last will, nor single-level wildcards (only multi-level wildcards). - -### Topics - -In MQTT, messages are published to, and subscribed from, topics. In Harper topics are aligned with resource endpoint paths in exactly the same way as the REST endpoints. If you define a table or resource in your schema, with a path/endpoint of "my-resource", that means that this can be addressed as a topic just like a URL path. So a topic of "my-resource/some-id" would correspond to the record in the my-resource table (or custom resource) with a record id of "some-id". - -This means that you can subscribe to "my-resource/some-id" and making this subscription means you will receive notification messages for any updates to this record. If this record is modified or deleted, a message will be sent to listeners of this subscription. - -The current value of this record is also treated as the "retained" message for this topic. When you subscribe to "my-resource/some-id", you will immediately receive the record for this id, through a "publish" command from the server, as the initial "retained" message that is first delivered. This provides a simple and effective way to get the current state of a record and future updates to that record without having to worry about timing issues of aligning a retrieval and subscription separately. - -Similarly, publishing a message to a "topic" also interacts with the database. Publishing a message with "retain" flag enabled is interpreted as an update or put to that record. The published message will replace the current record with the contents of the published message. - -If a message is published without a `retain` flag, the message will not alter the record at all, but will still be published to any subscribers to that record. - -Harper supports QoS 0 and 1 for publishing and subscribing. - -Harper supports multi-level topics, both for subscribing and publishing. Harper also supports multi-level wildcards, so you can subscribe to /`my-resource/#` to receive notifications for `my-resource/some-id` as well as `my-resource/nested/id`, or you can subscribe to `my-resource/nested/#` and receive the latter, but not the former, topic messages. Harper currently only supports trailing multi-level wildcards (no single-level wildcards with '\*'). - -#### Events - -JavaScript components can also listen for MQTT events. This is available on the server.mqtt.events object. For example, to set up a listener/callback for when MQTT clients connect and authorize, we can do: - -```javascript -server.mqtt.events.on('connected', (session, socket) => { - console.log('client connected with id', session.clientId); -}); -``` - -The following MQTT events are available: - -- `connection` - When a client initially establishes a TCP or WS connection to the server -- `connected` - When a client establishes an authorized MQTT connection -- `auth-failed` - When a client fails to authenticate -- `disconnected` - When a client disconnects from the server - -### Ordering - -Harper is designed to be a distributed database, and an intrinsic characteristic of distributed servers is that messages may take different amounts of time to traverse the network and may arrive in a different order depending on server location and network topology. Harper is designed for distributed data with minimal latency, and so messages are delivered to subscribers immediately when they arrive, Harper does not delay messages for coordinating confirmation or consensus among other nodes, which would significantly increase latency, messages are delivered as quickly as possible. - -As an example, let's consider message #1 is published to node A, which then sends the message to node B and node C, but the message takes a while to get there. Slightly later, while the first message is still in transit, message #2 is published to node B, which then replicates it to A and C, and because of network conditions, message #2 arrives at node C before message #1. Because Harper prioritizes low latency, when node C receives message #2, it immediately publishes it to all its local subscribers (it has no knowledge that message #1 is in transit). - -When message #1 is received by node C, the behavior of what it does with this message is dependent on whether the message is a "retained" message (was published with a retain flag set to true, or was put/update/upsert/inserted into the database) or was a non-retained message. In the case of a non-retained message, this message will be delivered to all local subscribers (even though it had been published earlier), thereby prioritizing the delivery of every message. On the other hand, a retained message will not deliver the earlier out-of-order message to clients, and Harper will keep the message with the latest timestamp as the "winning" record state (and will be retained message for any subsequent subscriptions). Retained messages maintain (eventual) consistency across the entire cluster of servers, all nodes will converge to the same message as the being the latest and retained message (#2 in this case). - -Non-retained messages are generally a good choice for applications like chat, where every message needs to be delivered even if they might arrive out-of-order (the order may not be consistent across all servers). Retained messages can be thought of a "superseding" messages, and are a good fit for applications like instrument measurements like temperature readings, where the priority to provide the _latest_ temperature and older temperature readings are not important to publish after a new reading, and consistency of the most-recent record (across the network) is important. - -### WebSockets - -WebSockets are supported through the REST interface and go through the `connect(incomingMessages)` method on resources. By default, making a WebSockets connection to a URL will subscribe to the referenced resource. For example, making a WebSocket connection to `new WebSocket('wss://server/my-resource/341')` will access the resource defined for 'my-resource' and the resource id of 341 and connect to it. On the web platform this could be: - -```javascript -let ws = new WebSocket('wss://server/my-resource/341'); -ws.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -By default, the resources will make a subscription to that resource, monitoring any changes to the records or messages published to it, and will return events on the WebSockets connection. You can also override `connect(incomingMessages)` with your own handler. The `connect` method simply needs to return an iterable (asynchronous iterable) that represents the stream of messages to be sent to the client. One easy way to create an iterable stream is to define the `connect` method as a generator and `yield` messages as they become available. For example, a simple WebSockets echo server for a resource could be written: - -```javascript -export class Echo extends Resource { - async *connect(incomingMessages) { - for await (let message of incomingMessages) { // wait for each incoming message from the client - // and send the message back to the client - yield message; - } - } -``` - -You can also call the default `connect` and it will provide a convenient streaming iterable with events for the outgoing messages, with a `send` method that you can call to send messages on the iterable, and a `close` event for determining when the connection is closed. The incoming messages iterable is also an event emitter, and you can listen for `data` events to get the incoming messages using event style: - -```javascript -export class Example extends Resource { - connect(incomingMessages) { - let outgoingMessages = super.connect(); - let timer = setInterval(() => { - outgoingMessages.send({greeting: 'hi again!'}); - }, 1000); // send a message once a second - incomingMessages.on('data', (message) => { - // another way of echo-ing the data back to the client - outgoingMessages.send(message); - }); - outgoingMessages.on('close', () => { - // make sure we end the timer once the connection is closed - clearInterval(timer); - }); - return outgoingMessages; - } -``` - -### Server Sent Events - -Server Sent Events (SSE) are also supported through the REST server interface, and provide a simple and efficient mechanism for web-based applications to receive real-time updates. For consistency of push delivery, SSE connections go through the `connect()` method on resources, much like WebSockets. The primary difference is that `connect` is called without any `incomingMessages` argument, since SSE is a one-directional transport mechanism. This can be used much like WebSockets, specifying a resource URL path will connect to that resource, and by default provides a stream of messages for changes and messages for that resource. For example, you can connect to receive notification in a browser for a resource like: - -```javascript -let eventSource = new EventSource('https://server/my-resource/341', { withCredentials: true }); -eventSource.onmessage = (event) => { - // received a notification from the server - let data = JSON.parse(event.data); -}; -``` - -### MQTT Feature Support Matrix - -| Feature | Support | -| ------------------------------------------------------------------ | -------------------------------------------------------------- | -| Connections, protocol negotiation, and acknowledgement with v3.1.1 | :heavy_check_mark: | -| Connections, protocol negotiation, and acknowledgement with v5 | :heavy_check_mark: | -| Secure MQTTS | :heavy_check_mark: | -| MQTTS over WebSockets | :heavy_check_mark: | -| MQTT authentication via user/pass | :heavy_check_mark: | -| MQTT authentication via mTLS | :heavy_check_mark: | -| Publish | :heavy_check_mark: | -| Subscribe | :heavy_check_mark: | -| Multi-level wildcard | :heavy_check_mark: | -| Single-level wildcard | :heavy_check_mark: | -| QoS 0 | :heavy_check_mark: | -| QoS 1 | :heavy_check_mark: | -| QoS 2 | Not fully supported, can perform conversation but does persist | -| Keep-Alive monitoring | :heavy_check_mark: | -| Clean session | :heavy_check_mark: | -| Durable session | :heavy_check_mark: | -| Distributed durable session | | -| Will | :heavy_check_mark: | -| MQTT V5 User properties | | -| MQTT V5 Will properties | | -| MQTT V5 Connection properties | | -| MQTT V5 Connection acknowledgement properties | | -| MQTT V5 Publish properties | | -| MQTT V5 Subscribe properties retain handling | :heavy_check_mark: | -| MQTT V5 Subscribe properties | | -| MQTT V5 Ack properties | | -| MQTT V5 AUTH command | | -| MQTT V5 Shared Subscriptions | | diff --git a/versioned_docs/version-4.7/developers/replication/index.md b/versioned_docs/version-4.7/developers/replication/index.md deleted file mode 100644 index 7099a937..00000000 --- a/versioned_docs/version-4.7/developers/replication/index.md +++ /dev/null @@ -1,300 +0,0 @@ ---- -title: Replication/Clustering ---- - -# Replication/Clustering - -Harper’s replication system is designed to make distributed data replication fast and reliable across multiple nodes. This means you can easily build a distributed database that ensures high availability, disaster recovery, and data localization. The best part? It’s simple to set up, configure, and manage. You can easily add or remove nodes, choose which data to replicate, and monitor the system’s health without jumping through hoops. - -### Replication Overview - -Harper replication uses a peer-to-peer model where every node in your cluster can send and subscribe to data. Each node connects through WebSockets, allowing data to flow seamlessly in both directions. By default, Harper takes care of managing these connections and subscriptions, so you don’t have to worry about data consistency. The system is designed to maintain secure, reliable connections between nodes, ensuring that your data is always safe. - -### Replication Configuration - -To connect your nodes, you need to provide hostnames or URLs for the nodes to connect to each other. This can be done via configuration or through operations. To configure replication, you can specify connection information the `replication` section of the [harperdb-config.yaml](../deployments/configuration). Here, you can specify the host name of the current node, and routes to connect to other nodes, for example: - -```yaml -replication: - hostname: server-one - routes: - - server-two - - server-three -``` - -In this example, the current node is `server-one`, and it will connect to `server-two` and `server-three`. Routes to other nodes can also be configured with URLs or ports: - -```yaml -replication: - hostname: server-one - routes: - - wss://server-two:9933 # URL based route - - hostname: server-three # define a hostname and port - port: 9933 -``` - -You can also use the [operations API](./operations-api/clustering) to dynamically add and remove nodes from the cluster. This is useful for adding new nodes to a running cluster or removing nodes that are no longer needed. For example (note this is the basic form, you would also need to provide the necessary credentials for the operation, see the section on securing connections for more details): - -```json -{ - "operation": "add_node", - "hostname": "server-two" -} -``` - -These operations will also dynamically generating certificates as needed, if there are no existing signed certificates, or if the existing certificates are not valid for the new node. - -Harper will also automatically replicate node information to other nodes in a cluster ([gossip-style discovery](https://highscalability.com/gossip-protocol-explained/)). This means that you only need to connect to one node in an existing cluster, and Harper will automatically detect and connect to other nodes in the cluster (bidirectionally). - -By default, Harper will replicate all the data in all the databases. You can configure which databases are replicated, and then override this behavior on a per-table basis. For example, you can indicate which databases should be replicated by default, here indicating you want to replicate the `data` and `system` databases: - -```yaml -replication: - databases: - - data - - system -``` - -By default, all tables within a replicated database will be replicated. Transactions are replicated atomically, which may involve data across multiple tables. However, you can also configure replication for individual tables, and disable and exclude replication for specific tables in a database by setting `replicate` to `false` in the table definition: - -```graphql -type LocalTableForNode @table(replicate: false) { - id: ID! - name: String! -} -``` - -You can also control which nodes data is replicated to, and how many nodes data is replicated to. By default, Harper will replicate data to all nodes in the cluster, but you can control where data is replicated to with the [sharding configuration and APIs](replication/sharding). - -By default, replication connects to the secure port 9933. You can configure the replication port in the `replication` section. - -```yaml -replication: - securePort: 9933 -``` - -### Securing Connections - -Harper supports the highest levels of security through public key infrastructure based security and authorization. Replication connections use WebSocket protocol and support multiple authentication methods depending on your security configuration: - -- **Certificate-based authentication** (recommended for production): Nodes are identified by the certificate's common name (CN) or Subject Alternative Names (SANs) -- **IP-based authentication** (for development/testing): Nodes are identified by their IP address when using insecure connections (see [Insecure Connection IP-based Authentication](#insecure-connection-ip-based-authentication) below) - -When using certificate-based authentication, Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to check if certificates have been revoked. This ensures that compromised certificates cannot be used for replication connections. OCSP and CRL verification works automatically with certificates from public certificate authorities (like Let's Encrypt or DigiCert) when `enableRootCAs` is enabled, as these certificates include the necessary OCSP responder URLs and CRL distribution points. For self-signed certificates or private CAs that don't support OCSP/CRL, you can use Harper's manual certificate revocation feature (see [Revoking Certificates](#revoking-certificates) below). Certificate verification settings follow the same configuration as HTTP mTLS connections (see [certificate verification configuration](/docs/deployments/configuration#http)). - -#### Provide your own certificates - -If you want to secure your Harper connections with your own signed certificates, you can easily do so. Whether you have certificates from a public authority (like Let's Encrypt or Digicert) or a corporate certificate authority, you can use them to authenticate nodes securely. You can then allow nodes to authorize each other by checking the certificate against the standard list of root certificate authorities by enabling the `enableRootCAs` option in the config: - -``` -replication - enableRootCAs: true -``` - -And then just make sure the certificate’s common name (CN) matches the node's hostname. - -#### Setting Up Custom Certificates - -There are two ways to configure Harper with your own certificates: - -1. Use the `add_certificate` operation to upload them. -1. Or, specify the certificate paths directly in the `replication` section of the `harperdb-config.yaml` file. - -If your certificate is signed by a trusted public authority, just provide the path to the certificate and private key. If you're using self-signed certificates or a private certificate authority, you’ll also need to provide the certificate authority (CA) details to complete the setup.\ -\ -Example configuration: - -```yaml -tls: - certificate: /path/to/certificate.pem - certificateAuthority: /path/to/ca.pem - privateKey: /path/to/privateKey.pem -``` - -With this in place, Harper will load the provided certificates into the certificate table and use these to secure and authenticate connections between nodes. - -You have the option to skip providing a specific certificate authority (CA) and instead verify your certificate against the root certificates included in the bundled Mozilla CA store. This bundled CA store, provided by Node.js, is a snapshot of Mozilla's CA certificates that is fixed at the time of each Node.js release. - -To enable the root certificates set `replication.enableRootCAs` to `true` in the `harperdb-config.yaml` file: - -```yaml -replication: - enableRootCAs: true -``` - -#### Cross-generated certificates - -Harper can also generate its own certificates for secure connections. This is useful for setting up secure connections between nodes when no existing certificates are available, and can be used in development, testing, or production environments. Certificates will be automatically requested and signed between nodes to support a form of distributed certificate generation and signing. To establish secure connections between nodes using cross-generated certificates, you simply use the [`add_node` operation](./operations-api/clustering) over SSL, and specify the temporary authentication credentials to use for connecting and authorizing the certificate generation and signing. \ -\ -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "verify_tls": false, - "authorization": { - "username": "admin", - "password": "password" - } -} -``` - -When you connect to another node (e.g., `server-two`), Harper uses secure WebSockets and the provided credentials to establish the connection. - -If you’re working with a fresh install, you’ll need to set `verify_tls` to `false` temporarily, so the self-signed certificate is accepted. Once the connection is made, Harper will automatically handle the certificate signing process: - -- It creates a certificate signing request (CSR), sends it to `server-two`, which then signs it and returns the signed certificate along with the certificate authority (CA). -- The signed certificate is stored for future connections between the nodes, ensuring secure communication. - -**Important:** Your credentials are not stored—they are discarded immediately after use. - -You can also provide credentials in HTTP Authorization format (Basic auth, Token auth, or JWT). This is helpful for handling authentication with the required permissions to generate and sign certificates. - -Additionally, you can use `set_node` as an alias for the `add_node` operation if you prefer. - -#### Revoking Certificates - -Certificates used in replication can be revoked by using the certificate serial number and either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config in `harperdb-config.yaml`. - -To utilize the `revoked_certificates` attribute in the `hdb_nodes` table, you can use the `add_node` or `update_node` operation to add the certificate serial number to the `revoked_certificates` array. For example: - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "revoked_certificates": ["1769F7D6A"] -} -``` - -To utilize the replication route config in `harperdb-config.yaml`, you can add the certificate serial number to the `revokedCertificates` array. For example: - -```yaml -replication: - routes: - - hostname: server-three - port: 9930 - revokedCertificates: - - 1769F7D6A - - QA69C7E2S -``` - -#### Removing Nodes - -Nodes can be removed from the cluster using the [`remove_node` operation](./operations-api/clustering). This will remove the node from the cluster, and stop replication to and from the node. For example: - -```json -{ - "operation": "remove_node", - "hostname": "server-two" -} -``` - -#### Insecure Connection IP-based Authentication - -You can completely disable secure connections and use IP addresses to authenticate nodes with each other. This can be useful for development and testing, or within a secure private network, but should never be used for production with publicly accessible servers. To disable secure connections, simply configure replication within an insecure port, either by [configuring the operations API](../deployments/configuration) to run on an insecure port or replication to run on an insecure port. And then set up IP-based routes to connect to other nodes: - -```yaml -replication: - port: 9933 - routes: - - 127.0.0.2 - - 127.0.0.3 -``` - -Note that in this example, we are using loop back addresses, which can be a convenient way to run multiple nodes on a single machine for testing and development. - -### Controlled Replication Flow - -By default, Harper will replicate all data in all databases, with symmetric bi-directional flow between nodes. However, there are times when you may want to control the replication flow between nodes, and dictate that data should only be replicated in one direction between certain nodes. This can be done by setting the direction in the `replicates` attribute of the node definition when adding the node or configuring the replication route. For example, to configure a node to only send data to `node-two` (which only receives), and only receive data from `node-three` (which only sends) you can add the following to the replication route: - -```yaml -replication: - databases: - - data - routes: - - host: node-two - replicates: - sends: false - receives: true - - host: node-three - replicates: - sends: true - receives: false -``` - -When using controlled flow replication, you will typically have different route configurations for each node to every other node. In that case, typically you do want to ensure that you are _not_ replicating the `system` database, since the `system` database containes the node configurations, and replicating the `system` database will cause all nodes to be replicated and have identical route configurations. - -#### Explicit Subscriptions - -By default, Harper automatically handles connections and subscriptions between nodes, ensuring data consistency across your cluster. It even uses data routing to manage node failures. However, you can manage these connections manually by explicitly subscribing to nodes. This should _not_ be used for production replication and should be avoided and exists only for testing, debugging, and legacy migration. This will likely be removed in V5. If you choose to manage subscriptions manually, Harper will no longer handle data consistency for you. This means there’s no guarantee that all nodes will have consistent data if subscriptions don’t fully replicate in all directions. If a node goes down, it’s possible that some data wasn’t replicated before the failure. If you want single direction replication, you can use controlled replication flow described above. - -#### How to Subscribe to Nodes - -To explicitly subscribe to a node, you can use operations like `add_node` and define the subscriptions. For example, you can configure a node (e.g., `server-two`) to publish transactions on a specific table (e.g., `dev.my-table`) without receiving data from that node. - -Example configuration: - -```json -{ - "operation": "add_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": false - } - ] -} -``` - -To update an explicit subscription you can use the [`update_node` operation](./operations-api/clustering). - -Here we are updating the subscription to receive transactions on the `dev.my-table` table from the `server-two` node. - -```json -{ - "operation": "update_node", - "hostname": "server-two", - "subscriptions": [ - { - "database": "dev", - "table": "my-table", - "publish": true, - "subscribe": true - } - ] -} -``` - -#### Monitoring Replication - -You can monitor the status of replication through the operations API. You can use the [`cluster_status` operation](./operations-api/clustering) to get the status of replication. For example: - -```json -{ - "operation": "cluster_status" -} -``` - -#### Database Initial Synchronization and Resynchronization - -When a new node is added to the cluster, if its database has not previously been synced, it will initially download the database from the first node it connects to. This will copy every record from the source database to the new node. Once the initial synchronization is complete, the new node will enter replication mode and receive records from each node as they are created, updated, or deleted. If a node goes down and comes back up, it will also resynchronize with the other nodes in the cluster, to ensure that it has the most up-to-date data. - -You may also specify a `start_time` in the `add_node` to specify that when a database connects, that it should not download the entire database, but only data since a given starting time. - -**Advanced Configuration** - -You can also check the configuration of the replication system, including the current known nodes and certificates, by querying the hdb_nodes and hdb_certificate table: - -```json -{ - "operation": "search_by_value", - "database": "system", - "table": "hdb_nodes", - "attribute": "name", - "value": "*" -} -``` diff --git a/versioned_docs/version-4.7/developers/replication/sharding.md b/versioned_docs/version-4.7/developers/replication/sharding.md deleted file mode 100644 index 307e38f1..00000000 --- a/versioned_docs/version-4.7/developers/replication/sharding.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: Sharding ---- - -Harper's replication system supports various levels of replication or sharding. Harper can be configured or set up to replicate to different data to different subsets of nodes. This can be used facilitate horizontally scalability of storage and write performance, while maintaining optimal strategies of data locality and data consistency. When sharding is configured, Harper will replicate data to only a subset of nodes, based on the sharding configuration, and can then retrieve data from the appropriate nodes as needed to fulfill requests for data. - -There are two main ways to setup sharding in Harper. The approach is to use dynamic sharding, where the location or residency of records is determined dynamically based on where the record was written and record data, and records can be dynamically relocated based on where they are accessed. This residency information can be specific to each record, and can vary based on the computed residency and where the data is written and accessed. - -The second approach is define specific shards, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed, or content. This approach is more static, but can be more efficient for certain use cases, and means that the location of data can always be predictably determined based on the primary key. - -## Configuration For Dynamic Sharding - -By default, Harper will replicate all data to all nodes. However, replication can easily be configured for "sharding", or storing different data in different locations or nodes. The simplest way to configure sharding and limit replication to improve performance and efficiency is to configure a replication-to count. This will limit the number of nodes that data is replicated to. For example, to specify that writes should replicate to 2 other nodes besides the node that first stored the data, you can set the `replicateTo` to 2 in the `replication` section of the `harperdb-config.yaml` file: - -```yaml -replication: - replicateTo: 2 -``` - -This will ensure that data is replicated to two other nodes, so that each record will be stored on three nodes in total. - -With a sharding configuration (or customization below) in place, requests will for records that don't reside on the server handling requests will automatically be forwarded to the appropriate node. This will be done transparently, so that the client will not need to know where the data is stored. - -## Replication Control with Headers - -With the REST interface, replication levels and destinations can also specified with the `X-Replicate-To` header. This can be used to indicate the number of additional nodes that data should be replicated to, or to specify the nodes that data should be replicated to. The `X-Replicate-To` header can be used with the `POST` and `PUT` methods. This header can also specify if the response should wait for confirmation from other nodes, and how many, with the `confirm` parameter. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: 2;confirm=1 - -... -``` - -You can also explicitly specify destination nodes by providing a comma-separated list of node hostnames. For example, to specify that data should be replicated to nodes `node1` and `node2`, you can use the following header: - -```http -PUT /MyTable/3 -X-Replicate-To: node1,node2 -``` - -(This can also be used with the `confirm` parameter.) - -## Replication Control with Operations - -Likewise, you can specify replicateTo and confirm parameters in the operation object when using the Harper API. For example, to specify that data should be replicated to two other nodes, and the response should be returned once confirmation is received from one other node, you can use the following operation object: - -```json -{ - "operation": "update", - "schema": "dev", - "table": "MyTable", - "hashValues": [3], - "record": { - "name": "John Doe" - }, - "replicateTo": 2, - "replicatedConfirmation": 1 -} -``` - -or you can specify nodes: - -```jsonc -{ - // ... - "replicateTo": ["node-1", "node-2"], - // ... -} -``` - -## Programmatic Replication Control - -Additionally, you can specify `replicateTo` and `replicatedConfirmation` parameters programmatically in the context of a resource. For example, you can define a put method: - -```javascript -class MyTable extends tables.MyTable { - put(record) { - const context = this.getContext(); - context.replicateTo = 2; // or an array of node names - context.replicatedConfirmation = 1; - return super.put(record); - } -} -``` - -## Configuration for Static Sharding - -Alternatively, you can configure static sharding, where each node is assigned to a specific shard, and each record is replicated to the nodes in that shard based on the primary key. The `shard` is identified by a number. To configure the shard for each node, you can specify the shard number in the `replication`'s `shard` in the configuration: - -```yaml -replication: - shard: 1 -``` - -Alternatively, you can configure the `shard` under the `replication` `routes`. This allows you to assign a specific shard id based on the routing configuration. - -```yaml -replication: - routes: - - hostname: node1 - shard: 1 - - hostname: node2 - shard: 2 -``` - -Or you can specify a `shard` number by including that property in an `add_node` operation or `set_node` operation, to dynamically assign a node to a shard. - -You can then specify shard number in the `setResidency` or `setResidencyById` functions below. - -## Custom Sharding - -You can also define a custom sharding strategy by specifying a function to compute the "residency" or location of where records should be stored and reside. To do this we use the `setResidency` method, providing a function that will determine the residency of each record. The function you provide will be called with the record entry, and should return an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the `id` field, you can use the following code: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? ['node1'] : ['node2']; -}); -``` - -With this approach, the record metadata, which includes the residency information, and any indexed properties, will be replicated to all nodes, but the full record will only be replicated to the nodes specified by the residency function. - -The `setResidency` function can alternately return a shard number, which will replicate the data to all the nodes in that shard: - -```javascript -MyTable.setResidency((record) => { - return record.id % 2 === 0 ? 1 : 2; -}); -``` - -### Custom Sharding By Primary Key - -Alternately you can define a custom sharding strategy based on the primary key alone. This allows records to be retrieved without needing access to the record data or metadata. With this approach, data will only be replicated to the nodes specified by the residency function (the record metadata doesn't need to replicated to all nodes). To do this, you can use the `setResidencyById` method, providing a function that will determine the residency or shard of each record based on the primary key. The function you provide will be called with the primary key, and should return a `shard` number or an array of nodes that the record should be replicated to (using their hostname). For example, to shard records based on the value of the primary key, you can use the following code: - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? 1 : 2; // return shard number -}); -``` - -or - -```javascript -MyTable.setResidencyById((id) => { - return id % 2 === 0 ? ['node1'] : ['node2']; // return array of node hostnames -}); -``` - -### Disabling Cross-Node Access - -Normally sharding allows data to be stored in specific nodes, but still allows access to the data from any node. However, you can also disable cross-node access so that data is only returned if is stored on the node where it is accessed. To do this, you can set the `replicateFrom` property on the context of operation to `false`: - -```json -{ - "operation": "search_by_id", - "table": "MyTable", - "ids": [3], - "replicateFrom": false -} -``` - -Or use a header with the REST API: - -```http -GET /MyTable/3 -X-Replicate-From: none -``` diff --git a/versioned_docs/version-4.7/developers/rest.md b/versioned_docs/version-4.7/developers/rest.md deleted file mode 100644 index 7e085d8e..00000000 --- a/versioned_docs/version-4.7/developers/rest.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: REST ---- - -# REST - -## REST - -Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation (for HTTP interactions), providing the best performance and HTTP interoperability with different clients. - -Resources, including tables, can be configured as RESTful endpoints. Make sure you review the [application introduction](applications/) and [defining schemas](applications/defining-schemas) to properly define your schemas and select which tables are exported and available through REST interface, as tables are not exported by default. The name of the [exported](applications/defining-schemas#export) resource defines the basis of the endpoint path available at the application HTTP server port [configured here](../deployments/configuration#http) (the default being `9926`). From there, a record id or query can be appended. Following uniform interface principles, HTTP methods define different actions with resources. For each method, this describes the default action. - -The default path structure provides access to resources at several levels: - -- `/my-resource` - The root path of a resource usually has a description of the resource (like a describe operation for a table). -- `/my-resource/` - The trailing slash in a path indicates it is a collection of the records. The root collection for a table represents all the records in a table, and usually you will append query parameters to query and search for more specific records. -- `/my-resource/record-id` - This resource locator represents a specific record, referenced by its id. This is typically how you can retrieve, update, and delete individual records. -- `/my-resource/record-id/` - Again, a trailing slash indicates a collection; here it is the collection of the records that begin with the specified id prefix. -- `/my-resource/record-id/with/multiple/parts` - A record id can consist of multiple path segments. - -### GET - -These can be used to retrieve individual records or perform searches. This is handled by the Resource method `get()` (and can be overridden). - -#### `GET /my-resource/` - -This can be used to retrieve a record by its primary key. The response will include the record as the body. - -##### Caching/Conditional Requests - -A `GET` response for a record will include an encoded version, a timestamp of the last modification, of this record in the `ETag` request headers (or any accessed record when used in a custom get method). On subsequent requests, a client (that has a cached copy) may include an `If-None-Match` request header with this tag. If the record has not been updated since this date, the response will have a 304 status and no body. This facilitates significant performance gains since the response data doesn't need to be serialized and transferred over the network. - -#### `GET /my-resource/?property=value` - -This can be used to search for records by the specified property name and value. See the querying section for more information. - -#### `GET /my-resource/.property` - -This can be used to retrieve the specified property of the specified record. Note that this will only work for properties that are declared in the schema. - -### PUT - -This can be used to create or update a record with the provided object/data (similar to an "upsert") with a specified key. This is handled by the Resource method `put(record)`. - -#### `PUT /my-resource/` - -This will create or update the record with the URL path that maps to the record's primary key. The record will be replaced with the contents of the data in the request body. The new record will exactly match the data that was sent (this will remove any properties that were present in the previous record and not included in the body). Future GETs will return the exact data that was provided by PUT (what you PUT is what you GET). For example: - -```http -PUT /MyTable/123 -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create or replace the record with a primary key of "123" with the object defined by the JSON in the body. This is handled by the Resource method `put()`. - -### DELETE - -This can be used to delete a record or records. - -### `DELETE /my-resource/` - -This will delete a record with the given primary key. This is handled by the Resource's `delete` method. For example: - -```http -DELETE /MyTable/123 -``` - -This will delete the record with the primary key of "123". - -### `DELETE /my-resource/?property=value` - -This will delete all the records that match the provided query. - -### POST - -Generally the POST method can be used for custom actions since POST has the broadest semantics. For tables that are expost\ed as endpoints, this also can be used to create new records. - -#### `POST /my-resource/` - -This is handled by the Resource method `post(data)`, which is a good method to extend to make various other types of modifications. Also, with a table you can create a new record without specifying a primary key, for example: - -```http -POST /MyTable/ -Content-Type: application/json - -{ "name": "some data" } -``` - -This will create a new record, auto-assigning a primary key, which will be returned in the `Location` header. - -### Querying through URL query parameters - -URL query parameters provide a powerful language for specifying database queries in Harper. This can be used to search by a single attribute name and value, to find all records which provide value for the given property/attribute. It is important to note that this attribute must be configured to be indexed to search on it. For example: - -```http -GET /my-resource/?property=value -``` - -We can specify multiple properties that must match: - -```http -GET /my-resource/?property=value&property2=another-value -``` - -Note that only one of the attributes needs to be indexed for this query to execute. - -We can also specify different comparators such as less than and greater than queries using [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax. If we want to specify records with an `age` value greater than 20: - -```http -GET /my-resource/?age=gt=20 -``` - -Or less than or equal to 20: - -```http -GET /my-resource/?age=le=20 -``` - -The comparison operators include standard FIQL operators, `lt` (less than), `le` (less than or equal), `gt` (greater than), `ge` (greater than or equal), and `ne` (not equal). These comparison operators can also be combined with other query parameters with `&`. For example, if we wanted products with a category of software and price between 100 and 200, we could write: - -```http -GET /Product/?category=software&price=gt=100&price=lt=200 -``` - -Comparison operators can also be used on Date fields, however, we have to ensure that the date format is properly escaped. For example, if we are looking for a listing date greater than `2017-03-08T09:00:00.000Z` we must escape the colons as `%3A`: - -``` -GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z -``` - -You can also search for attributes that start with a specific string, by using the == comparator and appending a `*` to the attribute value: - -```http -GET /Product/?name==Keyboard* -``` - -**Chained Conditions** - -You can also specify that a range condition must be met for a single attribute value by chaining conditions. This is done by omitting the name in the name-value pair. For example, to find products with a price between 100 and 200, you could write: - -```http -GET /Product/?price=gt=100<=200 -``` - -Chaining can be used to combined `gt` or `ge` with `lt` or `le` to specify a range of values. Currently, no other types of chaining are supported. - -Note that some HTTP clients may be overly aggressive in encoding query parameters, and you may need to disable extra encoding of query parameters, to ensure operators are passed through without manipulation. - -Here is a full list of the supported FIQL-style operators/comparators: - -- `==`: equal -- `=lt=`: less than -- `=le=`: less than or equal -- `=gt=`: greater than -- `=ge=`: greater than or equal -- `=ne=`, !=: not equal -- `=ct=`: contains the value (for strings) -- `=sw=`, `==*`: starts with the value (for strings) -- `=ew=`: ends with the value (for strings) -- `=`, `===`: strict equality (no type conversion) -- `!==`: strict inequality (no type conversion) - -#### Unions - -Conditions can also be applied with `OR` logic, returning the union of records that match either condition. This can be specified by using the `|` operator instead of `&`. For example, to return any product a rating of `5` _or_ a `featured` attribute that is `true`, we could write: - -```http -GET /Product/?rating=5|featured=true -``` - -#### Grouping of Operators - -Multiple conditions with different operators can be combined with grouping of conditions to indicate the order of operation. Grouping conditions can be done with parenthesis, with standard grouping conventions as used in query and mathematical expressions. For example, a query to find products with a rating of 5 OR a price between 100 and 200 could be written: - -```http -GET /Product/?rating=5|(price=gt=100&price=lt=200) -``` - -Grouping conditions can also be done with square brackets, which function the same as parenthesis for grouping conditions. The advantage of using square brackets is that you can include user provided values that might have parenthesis in them, and use standard URI component encoding functionality, which will safely escape/encode square brackets, but not parenthesis. For example, if we were constructing a query for products with a rating of a 5 and matching one of a set of user provided tags, a query could be built like: - -```http -GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] -``` - -And the tags could be safely generated from user inputs in a tag array like: - -```javascript -let url = `/Product/?rating=5[${tags.map(encodeURIComponent).join('|')}]`; -``` - -More complex queries can be created by further nesting groups: - -```http -GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] -``` - -### Query Calls - -Harper has several special query functions that use "call" syntax. These can be included in the query string as its own query entry (separated from other query conditions with an `&`). These include: - -#### `select(properties)` - -This function allows you to specify which properties should be included in the responses. This takes several forms: - -- `?select(property)`: This will return the values of the specified property directly in the response (will not be put in an object). -- `?select(property1,property2)`: This returns the records as objects, but limited to the specified properties. -- `?select([property1,property2,...])`: This returns the records as arrays of the property values in the specified properties. -- `?select(property1,)`: This can be used to specify that objects should be returned with the single specified property. -- `?select(property{subProperty1,subProperty2{subSubProperty,..}},...)`: This can be used to specify which sub-properties should be included in nested objects and joined/references records. - -To get a list of product names with a category of software: - -```http -GET /Product/?category=software&select(name) -``` - -#### `limit(start,end)` or `limit(end)` - -This function specifies a limit on the number of records returned, optionally providing a starting offset. - -For example, to find the first twenty records with a `rating` greater than 3, `inStock` equal to true, only returning the `rating` and `name` properties, you could use: - -```http -GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) -``` - -#### `sort(property)`, `sort(+property,-property,...)` - -This function allows you to indicate the sort order for the returned results. The argument for `sort()` is one or more properties that should be used to sort. If the property is prefixed with '+' or no prefix, the sort will be performed in ascending order by the indicated attribute/property. If the property is prefixed with '-', it will be sorted in descending order. If the multiple properties are specified, the sort will be performed on the first property, and for records with the same value for that property, the next property will be used to break the tie and sort results. This tie breaking will continue through any provided properties. - -For example, to sort by product name (in ascending order): - -```http -GET /Product?rating=gt=3&sort(+name) -``` - -To sort by rating in ascending order, then by price in descending order for products with the same rating: - -```http -GET /Product?sort(+rating,-price) -``` - -## Relationships - -Harper supports relationships in its data models, allowing for tables to define a relationship with data from other tables (or even itself) through foreign keys. These relationships can be one-to-many, many-to-one, or many-to-many (and even with ordered relationships). These relationships are defined in the schema, and then can easily be queried through chained attributes that act as "join" queries, allowing related attributes to referenced in conditions and selected for returned results. - -### Chained Attributes and Joins - -To support relationships and hierarchical data structures, in addition to querying on top-level attributes, you can also query on chained attributes. Most importantly, this provides Harper's "join" functionality, allowing related tables to be queried and joined in the results. Chained properties are specified by using dot syntax. In order to effectively leverage join functionality, you need to define a relationship in your schema: - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - brandId: ID @indexed - brand: Brand @relationship(from: "brandId") -} -type Brand @table @export { - id: ID @primaryKey - name: String - products: [Product] @relationship(to: "brandId") -} -``` - -And then you could query a product by brand name: - -```http -GET /Product/?brand.name=Microsoft -``` - -This will query for products for which the `brandId` references a `Brand` record with a `name` of `"Microsoft"`. - -The `brand` attribute in `Product` is a "computed" attribute from the foreign key (`brandId`), for the many-to-one relationship to the `Brand`. In the schema above, we also defined the reverse one-to-many relationship from a `Brand` to a `Product`, and we could likewise query that: - -```http -GET /Brand/?products.name=Keyboard -``` - -This would return any `Brand` with at least one product with a name `"Keyboard"`. Note, that both of these queries are effectively acting as an "INNER JOIN". - -#### Chained/Nested Select - -Computed relationship attributes are not included by default in query results. However, we can include them by specifying them in a select: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand) -``` - -We can also do a "nested" select and specify which sub-attributes to include. For example, if we only wanted to include the name property from the brand, we could do so: - -```http -GET /Product/?brand.name=Microsoft&select(name,brand{name}) -``` - -Or to specify multiple sub-attributes, we can comma delimit them. Note that selects can "join" to another table without any constraint/filter on the related/joined table: - -```http -GET /Product/?name=Keyboard&select(name,brand{name,id}) -``` - -When selecting properties from a related table without any constraints on the related table, this effectively acts like a "LEFT JOIN" and will omit the `brand` property if the brandId is `null` or references a non-existent brand. - -#### Many-to-many Relationships (Array of Foreign Keys) - -Many-to-many relationships are also supported, and can easily be created using an array of foreign key values, without requiring the traditional use of a junction table. This can be done by simply creating a relationship on an array-typed property that references a local array of foreign keys. For example, we could create a relationship to the resellers of a product (each product can have multiple resellers, each ) - -```graphql -type Product @table @export { - id: ID @primaryKey - name: String - resellerIds: [ID] @indexed - resellers: [Reseller] @relationship(from: "resellerId") -} -type Reseller @table { - id: ID @primaryKey - name: String - ... -} -``` - -The product record can then hold an array of the reseller ids. When the `reseller` property is accessed (either through code or through select, conditions), the array of ids is resolved to an array of reseller records. We can also query through the resellers relationships like with the other relationships. For example, to query the products that are available through the "Cool Shop": - -```http -GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) -``` - -One of the benefits of using an array of foreign key values is that the this can be manipulated using standard array methods (in JavaScript), and the array can dictate an order to keys and therefore to the resulting records. For example, you may wish to define a specific order to the resellers and how they are listed (which comes first, last): - -```http -PUT /Product/123 -Content-Type: application/json - -{ "id": "123", "resellerIds": ["first-reseller-id", "second-reseller-id", "last-reseller-id"], -...} -``` - -#### Type Conversion - -Queries parameters are simply text, so there are several features for converting parameter values to properly typed values for performing correct searches. For the FIQL comparators, which includes `==`, `!=`, `=gt=`, `=lt=`, `=ge=`, `=gt=`, the parser will perform type conversion, according to the following rules: - -- `name==null`: Will convert the value to `null` for searching. -- `name==123`: Will convert the value to a number _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==true`: Will convert the value to a boolean _if_ the attribute is untyped (there is no type specified in a GraphQL schema, or the type is specified to be `Any`). -- `name==number:123`: Will explicitly convert the value after "number:" to a number. -- `name==boolean:true`: Will explicitly convert the value after "boolean:" to a boolean. -- `name==string:some%20text`: Will explicitly keep the value after "string:" as a string (and perform URL component decoding) -- `name==date:2024-01-05T20%3A07%3A27.955Z`: Will explicitly convert the value after "date:" to a Date object. - -If the attribute specifies a type (like `Float`) in the schema definition, the value will always be converted to the specified type before searching. - -For "strict" operators, which includes `=`, `===`, and `!==`, no automatic type conversion will be applied, the value will be decoded as string with URL component decoding, and have type conversion applied if the attribute specifies a type, in which case the attribute type will specify the type conversion. - -#### Content Types and Negotiation - -HTTP defines a couple of headers for indicating the (preferred) content type of the request and response. The `Content-Type` request header can be used to specify the content type of the request body (for PUT, PATCH, and POST). The `Accept` request header indicates the preferred content type of the response. For general records with object structures, Harper supports the following content types: `application/json` - Common format, easy to read, with great tooling support. `application/cbor` - Recommended binary format for optimal encoding efficiency and performance. `application/x-msgpack` - This is also an efficient format, but CBOR is preferable, as it has better streaming capabilities and faster time-to-first-byte. `text/csv` - CSV, lacks explicit typing, not well suited for heterogeneous data structures, but good for moving data to and from a spreadsheet. - -CBOR is generally the most efficient and powerful encoding format, with the best performance, most compact encoding, and most expansive ability to encode different data types like Dates, Maps, and Sets. MessagePack is very similar and tends to have broader adoption. However, JSON can be easier to work with and may have better tooling. Also, if you are using compression for data transfer (gzip or brotli), JSON will often result in more compact compressed data due to character frequencies that better align with Huffman coding, making JSON a good choice for web applications that do not require specific data types beyond the standard JSON types. - -Requesting a specific content type can also be done in a URL by suffixing the path with extension for the content type. If you want to retrieve a record in CSV format, you could request: - -```http -GET /product/some-id.csv -``` - -Or you could request a query response in MessagePack: - -```http -GET /product/.msgpack?category=software -``` - -However, generally it is not recommended that you use extensions in paths and it is best practice to use the `Accept` header to specify acceptable content types. - -#### Specific Content Objects - -You can specify other content types, and the data will be stored as a record or object that holds the type and contents of the data. For example, if you do: - -``` -PUT /my-resource/33 -Content-Type: text/calendar - -BEGIN:VCALENDAR -VERSION:2.0 -... -``` - -This would store a record equivalent to JSON: - -``` -{ "contentType": "text/calendar", data: "BEGIN:VCALENDAR\nVERSION:2.0\n... -``` - -Retrieving a record with `contentType` and `data` properties will likewise return a response with the specified `Content-Type` and body. If the `Content-Type` is not of the `text` family, the data will be treated as binary data (a Node.js `Buffer`). - -You can also use `application/octet-stream` to indicate that the request body should be preserved in binary form. This also useful for uploading to a specific property: - -``` -PUT /my-resource/33/image -Content-Type: image/gif - -...image data... -``` diff --git a/versioned_docs/version-4.7/developers/security/basic-auth.md b/versioned_docs/version-4.7/developers/security/basic-auth.md deleted file mode 100644 index 22361432..00000000 --- a/versioned_docs/version-4.7/developers/security/basic-auth.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Basic Authentication ---- - -# Basic Authentication - -Harper uses Basic Auth and JSON Web Tokens (JWTs) to secure our HTTP requests. In the context of an HTTP transaction, **basic access authentication** is a method for an HTTP user agent to provide a username and password when making a request. - -**You do not need to log in separately. Basic Auth is added to each HTTP request like create_database, create_table, insert etc… via headers.** - -A header is added to each HTTP request. The header key is `Authorization` the header value is `Basic <>`. - -## Authentication in Harper Studio - -In the below code sample, you can see where we add the authorization header to the request. This needs to be added for each and every HTTP request for Harper. - -_Note: This function uses btoa. Learn about_ [_btoa here_](https://developer.mozilla.org/en-US/docs/Web/API/btoa)_._ - -```javascript -function callHarperDB(call_object, operation, callback) { - const options = { - method: 'POST', - hostname: call_object.endpoint_url, - port: call_object.endpoint_port, - path: '/', - headers: { - 'content-type': 'application/json', - 'authorization': 'Basic ' + btoa(call_object.username + ':' + call_object.password), - 'cache-control': 'no-cache', - }, - }; - - const http_req = http.request(options, function (hdb_res) { - let chunks = []; - - hdb_res.on('data', function (chunk) { - chunks.push(chunk); - }); - - hdb_res.on('end', function () { - const body = Buffer.concat(chunks); - if (isJson(body)) { - return callback(null, JSON.parse(body)); - } else { - return callback(body, null); - } - }); - }); - - http_req.on('error', function (chunk) { - return callback('Failed to connect', null); - }); - - http_req.write(JSON.stringify(operation)); - http_req.end(); -} -``` diff --git a/versioned_docs/version-4.7/developers/security/certificate-management.md b/versioned_docs/version-4.7/developers/security/certificate-management.md deleted file mode 100644 index 5fc6cb2c..00000000 --- a/versioned_docs/version-4.7/developers/security/certificate-management.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -This document is information on managing certificates for Harper external facing APIs. For information on certificate management for clustering see [clustering certificate management](../../reference/clustering/certificate-management). - -## Development - -An out of the box install of Harper does not have HTTPS enabled (see [configuration](../../deployments/configuration#http) for relevant configuration file settings.) This is great for local development. If you are developing using a remote server and your requests are traversing the Internet, we recommend that you enable HTTPS. - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -By default Harper will generate certificates and place them at `/keys/`. These certificates will not have a valid Common Name (CN) for your Harper node, so you will be able to use HTTPS, but your HTTPS client must be configured to accept the invalid certificate. - -## Production - -For production deployments, in addition to using HTTPS, we recommend using your own certificate authority (CA) or a public CA such as Let's Encrypt, to generate certificates with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. - -We have a few recommended options for enabling HTTPS in a production setting. - -### Option: Enable Harper HTTPS and Replace Certificates - -To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` to the port you wish to use for HTTPS connections and restart Harper. - -To replace the certificates, either replace the contents of the existing certificate files at `/keys/`, or update the Harper configuration with the path of your new certificate files, and then restart Harper. - -```yaml -tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -`operationsApi.tls` configuration is optional. If it is not set Harper will default to the values in the `tls` section. - -```yaml -operationsApi: - tls: - certificate: ~/hdb/keys/certificate.pem - privateKey: ~/hdb/keys/privateKey.pem -``` - -### mTLS - -Mutual TLS (mTLS) is a security protocol that requires both the client and the server to present certificates to each other. Requiring a client certificate can be useful for authenticating clients and ensuring that only authorized clients can access your Harper instance. This can be enabled by setting the `http.mtls` configuration in `harperdb-config.yaml` to `true` and providing a certificate authority in the TLS section: - -```yaml - -http: - mtls: true - ... -tls: - certificateAuthority: ~/hdb/keys/ca.pem - ... -``` - -### Certificate Revocation Checking - -When using mTLS, you may also want to enable certificate revocation checking to ensure that revoked certificates cannot be used for authentication, even if they're still within their validity period. Harper supports two industry-standard methods for checking certificate revocation status: - -**CRL (Certificate Revocation List)** - -- A digitally signed list of revoked certificates published by the Certificate Authority -- Downloaded and cached locally for fast verification -- Updated periodically (typically daily) -- Best for: High-volume verification, offline scenarios, predictable bandwidth usage - -**OCSP (Online Certificate Status Protocol)** - -- Real-time query to check individual certificate status -- Provides immediate revocation status -- Requires network connection for each check (with caching) -- Best for: Real-time revocation status, certificates without CRL distribution points - -**Harper's Approach: CRL-First with OCSP Fallback** - -Harper uses a CRL-first strategy for optimal performance: - -1. Checks CRL if available (fast, cached locally for 24 hours by default) -2. Falls back to OCSP if CRL is not available or fails (cached for 1 hour by default) -3. Applies the configured failure mode if both methods fail - -This strategy provides the best balance of performance, reliability, and security. - -**Enabling Certificate Verification** - -Certificate revocation checking is disabled by default and must be explicitly enabled: - -```yaml -http: - mtls: - required: true - certificateVerification: true # Enable with defaults -``` - -For production environments with high-security requirements, you can customize the verification settings: - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-closed # Reject connections on verification failure - crl: - timeout: 15000 # 15 seconds to download CRL - cacheTtl: 43200000 # Cache for 12 hours - ocsp: - timeout: 8000 # 8 seconds for OCSP response - cacheTtl: 7200000 # Cache for 2 hours -``` - -**Performance Considerations** - -- **CRL caching**: CRLs are cached locally, so subsequent verifications are very fast (no network requests) -- **OCSP caching**: Successful OCSP responses are cached (1 hour by default), errors cached for 5 minutes -- **Background refresh**: CRLs are refreshed in the background before expiration to avoid blocking requests -- **Graceful degradation**: Network failures don't block connections in fail-open mode - -**When to Use Certificate Verification** - -Enable certificate revocation checking when: - -- You need to immediately revoke access for compromised certificates -- Compliance or security policies require revocation checking -- You're in a zero-trust security environment -- Client certificates have long validity periods - -You may skip it if: - -- All certificates have very short validity periods (e.g., < 24 hours) -- You have alternative revocation mechanisms in place -- Performance is critical and risk is acceptable - -For detailed configuration options, see the [configuration reference](../../deployments/configuration#http). - -### Option: Nginx Reverse Proxy - -Instead of enabling HTTPS for Harper, Nginx can be used as a reverse proxy for Harper. - -Install Nginx, configure Nginx to use certificates issued from your own CA or a public CA, then configure Nginx to listen for HTTPS requests and forward to Harper as HTTP requests. - -[Certbot](https://certbot.eff.org/) is a great tool for automatically requesting and renewing Let’s Encrypt certificates used by Nginx. - -### Option: External Reverse Proxy - -Instead of enabling HTTPS for Harper, a number of different external services can be used as a reverse proxy for Harper. These services typically have integrated certificate management. Configure the service to listen for HTTPS requests and forward (over a private network) to Harper as HTTP requests. - -Examples of these types of services include an AWS Application Load Balancer or a GCP external HTTP(S) load balancer. - -### Additional Considerations - -It is possible to use different certificates for the Operations API and the Custom Functions API. In scenarios where only your Custom Functions endpoints need to be exposed to the Internet and the Operations API is reserved for Harper administration, you may want to use a private CA to issue certificates for the Operations API and a public CA for the Custom Functions API certificates. diff --git a/versioned_docs/version-4.7/developers/security/certificate-verification.md b/versioned_docs/version-4.7/developers/security/certificate-verification.md deleted file mode 100644 index dd7360ec..00000000 --- a/versioned_docs/version-4.7/developers/security/certificate-verification.md +++ /dev/null @@ -1,502 +0,0 @@ ---- -title: Certificate Verification ---- - -# Certificate Verification - -Certificate verification (also called certificate revocation checking) is a security feature that ensures revoked certificates cannot be used for authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date due to compromise, employee departure, or other security concerns. - -## Overview - -When a client presents a certificate for mTLS authentication, Harper performs the following checks: - -1. **Certificate Validation** (always performed by Node.js TLS): - - Certificate signature is valid - - Certificate is issued by a trusted CA - - Certificate is within its validity period - - Certificate chain is properly formed - -2. **Certificate Revocation Checking** (optional, must be explicitly enabled): - - Certificate has not been revoked by the issuing CA - - Uses CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol) - -## Revocation Checking Methods - -Harper supports two industry-standard methods for checking certificate revocation status: - -### CRL (Certificate Revocation List) - -A CRL is a digitally signed list of revoked certificates published by a Certificate Authority. - -**Advantages:** - -- Fast verification (cached locally) -- Works offline once downloaded -- Predictable bandwidth usage -- Good for high-volume verification -- No privacy concerns (no per-certificate queries) - -**How it works:** - -1. Harper downloads the CRL from the distribution point specified in the certificate -2. CRL is cached locally (24 hours by default) -3. Subsequent verifications check the cached CRL (very fast, no network requests) -4. CRL is refreshed in the background before expiration - -**Configuration:** - -```yaml -http: - mtls: - certificateVerification: - crl: - timeout: 10000 # 10 seconds to download CRL - cacheTtl: 86400000 # Cache for 24 hours - gracePeriod: 86400000 # 24 hour grace period after nextUpdate - failureMode: fail-closed # Reject on CRL check failure -``` - -### OCSP (Online Certificate Status Protocol) - -OCSP provides real-time certificate status checking by querying the CA's OCSP responder. - -**Advantages:** - -- Real-time revocation status -- Smaller response size than CRL -- Good for certificates without CRL distribution points -- Works when CRL is unavailable - -**How it works:** - -1. Harper sends a request to the OCSP responder specified in the certificate -2. OCSP responder returns the current status (good, revoked, or unknown) -3. Response is cached (1 hour by default for success, 5 minutes for errors) - -**Configuration:** - -```yaml -http: - mtls: - certificateVerification: - ocsp: - timeout: 5000 # 5 seconds for OCSP response - cacheTtl: 3600000 # Cache successful responses for 1 hour - errorCacheTtl: 300000 # Cache errors for 5 minutes - failureMode: fail-closed # Reject on OCSP check failure -``` - -## Verification Strategy - -Harper uses a **CRL-first strategy with OCSP fallback** for optimal performance and reliability: - -1. **Check CRL** if available - - Fast (uses cached CRL) - - No network request needed if CRL is cached - - If CRL check succeeds or fails definitively, return result - -2. **Fall back to OCSP** if: - - Certificate has no CRL distribution point - - CRL download fails - - CRL is expired and cannot be refreshed - -3. **Apply failure mode** if both methods fail - -This strategy provides the best balance of: - -- **Performance**: CRL checks are very fast when cached -- **Reliability**: OCSP provides fallback when CRL is unavailable -- **Security**: Always attempts verification before falling back - -## Configuration - -### Enable with Defaults - -The simplest configuration enables certificate verification with sensible defaults: - -```yaml -http: - mtls: - required: true - certificateVerification: true -``` - -This enables: - -- CRL checking (enabled, 10s timeout, 24h cache) -- OCSP checking (enabled, 5s timeout, 1h cache) -- Fail-closed mode (rejects connections on verification failure) - -### Custom Configuration - -For production environments, you may want to customize settings: - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-closed # Global setting - crl: - timeout: 15000 # 15 seconds for CRL download - cacheTtl: 43200000 # Cache CRLs for 12 hours - gracePeriod: 86400000 # 24 hour grace period - failureMode: fail-closed # CRL-specific setting - ocsp: - timeout: 8000 # 8 seconds for OCSP response - cacheTtl: 7200000 # Cache results for 2 hours - errorCacheTtl: 600000 # Cache errors for 10 minutes - failureMode: fail-closed # OCSP-specific setting -``` - -### CRL Only (No OCSP) - -For environments where OCSP is not available or desired: - -```yaml -http: - mtls: - certificateVerification: - ocsp: false # Disable OCSP, CRL remains enabled -``` - -### OCSP Only (No CRL) - -For environments preferring real-time checking: - -```yaml -http: - mtls: - certificateVerification: - crl: false # Disable CRL, OCSP remains enabled -``` - -### Environment Variables - -All settings can be configured via environment variables: - -```bash -# Enable certificate verification -HTTP_MTLS_CERTIFICATEVERIFICATION=true - -# Global failure mode -HTTP_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed - -# CRL settings -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL=true -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_CACHETTL=43200000 -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_GRACEPERIOD=86400000 -HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_FAILUREMODE=fail-closed - -# OCSP settings -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP=true -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_TIMEOUT=8000 -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_CACHETTL=7200000 -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_ERRORCACHETTL=600000 -HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_FAILUREMODE=fail-closed -``` - -For replication servers, use the `REPLICATION_` prefix instead of `HTTP_`. - -## Failure Modes - -Certificate verification supports two failure modes that control behavior when verification cannot be completed: - -### fail-closed (Recommended) - -**Default behavior.** Rejects connections when verification fails due to network errors, timeouts, or other operational issues. - -**Use when:** - -- Security is paramount -- You can tolerate false positives (rejecting valid certificates) -- Your CA infrastructure is highly available -- You're in a zero-trust environment - -**Example:** - -```yaml -certificateVerification: - failureMode: fail-closed -``` - -### fail-open - -Allows connections when verification fails, but logs a warning. The connection is still rejected if the certificate is explicitly found to be revoked. - -**Use when:** - -- Availability is more important than perfect security -- Your CA infrastructure may be intermittently unavailable -- You have other compensating controls -- You're gradually rolling out certificate verification - -**Example:** - -```yaml -certificateVerification: - failureMode: fail-open -``` - -**Important:** Invalid signatures on CRLs always result in rejection regardless of failure mode, as this indicates potential tampering. - -## Performance Considerations - -### CRL Performance - -- **First verification**: Downloads CRL (10s timeout by default) -- **Subsequent verifications**: Instant (reads from cache) -- **Background refresh**: CRL is refreshed before expiration without blocking requests -- **Memory usage**: ~10-100KB per CRL depending on size -- **Network usage**: One download per CRL per cacheTtl period - -### OCSP Performance - -- **First verification**: OCSP query (5s timeout by default) -- **Subsequent verifications**: Reads from cache (1 hour default) -- **Memory usage**: Minimal (~1KB per cached response) -- **Network usage**: One query per unique certificate per cacheTtl period - -### Optimization Tips - -1. **Increase CRL cache TTL** for stable environments: - - ```yaml - crl: - cacheTtl: 172800000 # 48 hours - ``` - -2. **Increase OCSP cache TTL** for long-lived connections: - - ```yaml - ocsp: - cacheTtl: 7200000 # 2 hours - ``` - -3. **Use CRL only** if you control the CA and **all certificates have CRL distribution points**: - - ```yaml - ocsp: false # Only disable if all certs have CRL URLs - ``` - -4. **Reduce grace period** if you need tighter revocation enforcement: - ```yaml - crl: - gracePeriod: 0 # No grace period - ``` - -## Production Best Practices - -### High-Security Environments - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-closed # Always reject on failure - crl: - timeout: 15000 # Longer timeout for reliability - cacheTtl: 43200000 # 12 hours (balance security and performance) - gracePeriod: 0 # No grace period for strict enforcement - ocsp: - timeout: 8000 - cacheTtl: 3600000 # 1 hour -``` - -### High-Availability Environments - -```yaml -http: - mtls: - required: true - certificateVerification: - failureMode: fail-open # Prioritize availability - crl: - timeout: 5000 # Shorter timeout to fail faster - cacheTtl: 86400000 # 24 hours - gracePeriod: 86400000 # 24 hour grace period - ocsp: - timeout: 3000 - cacheTtl: 7200000 # 2 hours for fewer queries -``` - -### Performance-Critical Environments - -For maximum performance, increase cache durations to minimize network requests: - -```yaml -http: - mtls: - required: true - certificateVerification: - crl: - cacheTtl: 172800000 # 48 hours (minimize CRL downloads) - gracePeriod: 86400000 # 24 hour grace period - ocsp: - cacheTtl: 7200000 # 2 hours (minimize OCSP queries) - errorCacheTtl: 600000 # Cache errors for 10 minutes -``` - -**Note**: Only disable OCSP (`ocsp: false`) if you're certain all client certificates have CRL distribution points. Otherwise, certificates without CRLs won't be checked for revocation. - -## Troubleshooting - -### Connection Rejected: Certificate Verification Failed - -**Cause:** Certificate was found to be revoked or verification failed in fail-closed mode. - -**Solutions:** - -1. Check if certificate is actually revoked in the CRL or OCSP responder -2. Verify CA infrastructure is accessible -3. Check timeout settings (may need to increase) -4. Temporarily use fail-open mode while investigating: - ```yaml - certificateVerification: - failureMode: fail-open - ``` - -### High Latency on First Connection - -**Cause:** CRL is being downloaded for the first time. - -**Solutions:** - -1. This is normal and only happens once per CRL per cacheTtl period -2. Subsequent connections will be fast (cached CRL) -3. Increase CRL timeout if downloads are slow: - ```yaml - crl: - timeout: 20000 # 20 seconds - ``` - -### Frequent CRL Downloads - -**Cause:** CRL cacheTtl is too short or CRL nextUpdate period is very short. - -**Solutions:** - -1. Increase cacheTtl: - ```yaml - crl: - cacheTtl: 172800000 # 48 hours - ``` -2. Increase gracePeriod to allow using slightly expired CRLs: - ```yaml - crl: - gracePeriod: 172800000 # 48 hours - ``` - -### OCSP Responder Unavailable - -**Cause:** OCSP responder is down or unreachable. - -**Solutions:** - -1. CRL will be used as fallback automatically -2. Use fail-open mode to allow connections: - ```yaml - ocsp: - failureMode: fail-open - ``` -3. Disable OCSP and rely on CRL only (ensure all certs have CRL URLs): - ```yaml - ocsp: false - ``` - -### Network/Firewall Blocking Outbound Requests - -**Cause:** Secure hosting environments often restrict outbound HTTP/HTTPS traffic to reduce exfiltration risks. This prevents Harper from reaching CRL distribution points and OCSP responders. - -**Symptoms:** - -- Certificate verification timeouts in fail-closed mode -- Logs show connection failures to CRL/OCSP URLs -- First connection succeeds (no cached CRL), subsequent fail after cache expires - -**Solutions:** - -1. **Allow outbound traffic to CA infrastructure** (recommended): - - Whitelist CRL distribution point URLs (from your certificates) - - Whitelist OCSP responder URLs (from your certificates) - - Example: If using Let's Encrypt, allow `http://x1.c.lencr.org/` and `http://ocsp.int-x3.letsencrypt.org/` - -2. **Use fail-open mode** (allows connections when verification fails): - - ```yaml - certificateVerification: - failureMode: fail-open # Don't block on network issues - ``` - -3. **Use CRL only with local caching/proxy**: - - Set up an internal CRL mirror/proxy - - Configure firewall to allow Harper → internal CRL proxy - - Increase cache TTL to reduce fetch frequency: - ```yaml - certificateVerification: - crl: - cacheTtl: 172800000 # 48 hours - ocsp: false # Disable OCSP - ``` - -4. **Disable verification** (if you have alternative security controls): - ```yaml - certificateVerification: false - ``` - -## Security Considerations - -### When Certificate Verification is Critical - -Enable certificate verification when: - -- Certificates have long validity periods (> 1 day) -- You need immediate revocation capability -- Compliance requires revocation checking (PCI DSS, HIPAA, etc.) -- You're in a zero-trust security model -- Client certificates are used for API authentication - -### When You Might Skip It - -Consider not using certificate verification when: - -- Certificates have very short validity periods (< 24 hours) -- You rotate certificates automatically (e.g., with cert-manager) -- You have alternative revocation mechanisms -- Performance is critical and risk is acceptable -- Your CA doesn't publish CRLs or support OCSP - -### Defense in Depth - -Certificate verification is one layer of security. Also consider: - -- Short certificate validity periods (reduces window of compromise) -- Certificate pinning (prevents CA compromise) -- Network segmentation (limits blast radius) -- Access logging and monitoring -- Regular certificate rotation - -## Replication Server - -Certificate verification works identically for replication servers. Use the `replication.mtls` configuration: - -```yaml -replication: - hostname: server-one - routes: - - server-two - mtls: - certificateVerification: true -``` - -**Important:** mTLS is always required for replication and cannot be disabled. This configuration only controls whether certificate revocation checking is performed. - -For complete replication configuration, see [Configuration - Replication](../../deployments/configuration#replication). - -## Further Reading - -- [Certificate Management](./certificate-management) - Managing certificates and CAs -- [mTLS Authentication](./mtls-auth) - Setting up mTLS -- [Configuration Reference](../../deployments/configuration) - Complete configuration options diff --git a/versioned_docs/version-4.7/developers/security/configuration.md b/versioned_docs/version-4.7/developers/security/configuration.md deleted file mode 100644 index 2dee9d86..00000000 --- a/versioned_docs/version-4.7/developers/security/configuration.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Configuration ---- - -# Configuration - -Harper was set up to require very minimal configuration to work out of the box. There are, however, some best practices we encourage for anyone building an app with Harper. - -## CORS - -Harper allows for managing [cross-origin HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS). By default, Harper enables CORS for all domains if you need to disable CORS completely or set up an access list of domains you can do the following: - -1. Open the harperdb-config.yaml file, which can be found in \, the location you specified during install. -1. In harperdb-config.yaml there should be 2 entries under `operationsApi.network`: cors and corsAccessList. - - `cors` - 1. To turn off, change to: `cors: false` - 1. To turn on, change to: `cors: true` - - `corsAccessList` - 1. The `corsAccessList` will only be recognized by the system when `cors` is `true` - 1. To create an access list you set `corsAccessList` to a comma-separated list of domains. - - i.e. `corsAccessList` is `https://harpersystems.dev,https://products.harpersystems.dev` - - 1. To clear out the access list and allow all domains: `corsAccessList` is `[null]` - -## SSL - -Harper provides the option to use an HTTP or HTTPS and HTTP/2 interface. The default port for the server is 9925. - -These default ports can be changed by updating the `operationsApi.network.port` value in `/harperdb-config.yaml` - -By default, HTTPS is turned off and HTTP is turned on. It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. HTTP is intended for local or private network use. - -You can toggle HTTPS and HTTP in the settings file. By setting `operationsApi.network.https` to true/false. When `https` is set to `false`, the server will use HTTP (version 1.1). Enabling HTTPS will enable both HTTPS/1.1 and HTTPS/2. - -Harper automatically generates a certificate (certificate.pem), a certificate authority (ca.pem) and a private key file (privateKey.pem) which live at `/keys/`. - -You can replace these with your own certificates and key. - -**Changes to these settings require a restart. Use operation `harperdb restart` from Harper Operations API.** diff --git a/versioned_docs/version-4.7/developers/security/index.md b/versioned_docs/version-4.7/developers/security/index.md deleted file mode 100644 index a090aa88..00000000 --- a/versioned_docs/version-4.7/developers/security/index.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Security ---- - -# Security - -Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they're supposed to be able to access. Our granular permissions allow for unparalleled flexibility and control, and can actually lower the total cost of ownership compared to other database solutions, since you no longer have to replicate subsets of your data to isolate use cases. - -## Authentication - -- [JWT Authentication](security/jwt-auth) - Token-based authentication using JSON Web Tokens -- [Basic Authentication](security/basic-auth) - Username and password authentication -- [mTLS Authentication](security/mtls-auth) - Mutual TLS certificate-based authentication - -## Certificate Management - -- [Certificate Management](security/certificate-management) - Managing certificates and Certificate Authorities -- [Certificate Verification](security/certificate-verification) - Certificate revocation checking (CRL/OCSP) - -## Access Control - -- [Configuration](security/configuration) - Security configuration and settings -- [Users and Roles](security/users-and-roles) - Role-based access control and permissions diff --git a/versioned_docs/version-4.7/developers/security/jwt-auth.md b/versioned_docs/version-4.7/developers/security/jwt-auth.md deleted file mode 100644 index 832373e4..00000000 --- a/versioned_docs/version-4.7/developers/security/jwt-auth.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: JWT Authentication ---- - -# JWT Authentication - -Harper uses token based authentication with JSON Web Tokens, JWTs. - -This consists of two primary operations `create_authentication_tokens` and `refresh_operation_token`. These generate two types of tokens, as follows: - -- The `operation_token` which is used to authenticate all Harper operations in the Bearer Token Authorization Header. The default expiry is one day. -- The `refresh_token` which is used to generate a new `operation_token` upon expiry. This token is used in the Bearer Token Authorization Header for the `refresh_operation_token` operation only. The default expiry is thirty days. - -The `create_authentication_tokens` operation can be used at any time to refresh both tokens in the event that both have expired or been lost. - -## Create Authentication Tokens - -Users must initially create tokens using their Harper credentials. The following POST body is sent to Harper. No headers are required for this POST operation. - -```json -{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -} -``` - -A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "operation": "create_authentication_tokens", - "username": "username", - "password": "password" -}' -``` - -An example expected return object is: - -```json -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4", - "refresh_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60" -} -``` - -## Using JWT Authentication Tokens - -The `operation_token` value is used to authenticate all operations in place of our standard Basic auth. In order to pass the token you will need to create an Bearer Token Authorization Header like the following request: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDUwNjQ2MDAsInN1YiI6Im9wZXJhdGlvbiJ9.MpQA-9CMjA-mn-7mHyUXSuSC_-kqMqJXp_NDiKLFtbtMRbodCuY3DzH401rvy_4vb0yCELf0B5EapLVY1545sv80nxSl6FoZFxQaDWYXycoia6zHpiveR8hKlmA6_XTWHJbY2FM1HAFrdtt3yUTiF-ylkdNbPG7u7fRjTmHfsZ78gd2MNWIDkHoqWuFxIyqk8XydQpsjULf2Uacirt9FmHfkMZ-Jr_rRpcIEW0FZyLInbm6uxLfseFt87wA0TbZ0ofImjAuaW_3mYs-3H48CxP152UJ0jByPb0kHsk1QKP7YHWx1-Wce9NgNADfG5rfgMHANL85zvkv8sJmIGZIoSpMuU3CIqD2rgYnMY-L5dQN1fgfROrPMuAtlYCRK7r-IpjvMDQtRmCiNG45nGsM4DTzsa5GyDrkGssd5OBhl9gr9z9Bb5HQVYhSKIOiy72dK5dQNBklD4eGLMmo-u322zBITmE0lKaBcwYGJw2mmkYcrjDOmsDseU6Bf_zVUd9WF3FqwNkhg4D7nrfNSC_flalkxPHckU5EC_79cqoUIX2ogufBW5XgYbU4WfLloKcIpb51YTZlZfwBHlHPSyaq_guaXFaeCUXKq39_i1n0HRF_mRaxNru0cNDFT9Fm3eD7V8axFijSVAMDyQs_JR7SY483YDKUfN4l-vw-EVynImr4' \ ---data-raw '{ - "operation":"search_by_hash", - "schema":"dev", - "table":"dog", - "hash_values":[1], - "get_attributes": ["*"] -}' -``` - -## Token Expiration - -`operation_token` expires at a set interval. Once it expires it will no longer be accepted by Harper. This duration defaults to one day, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token`, the `refresh_operation_token` operation is used, passing the `refresh_token` in the Bearer Token Authorization Header. A full cURL example can be seen here: - -```bash -curl --location --request POST 'http://localhost:9925' \ ---header 'Content-Type: application/json' \ ---header 'Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InVzZXJuYW1lIiwiaWF0IjoxNjA0OTc4MjAwLCJleHAiOjE2MDc1NzAyMDAsInN1YiI6InJlZnJlc2gifQ.acaCsk-CJWIMLGDZdGnsthyZsJfQ8ihXLyE8mTji8PgGkpbwhs7e1O0uitMgP_pGjHq2tey1BHSwoeCL49b18WyMIB10hK-q2BXGKQkykltjTrQbg7VsdFi0h57mGfO0IqAwYd55_hzHZNnyJMh4b0iPQFDwU7iTD7x9doHhZAvzElpkWbc_NKVw5_Mw3znjntSzbuPN105zlp4Niurin-_5BnukwvoJWLEJ-ZlF6hE4wKhaMB1pWTJjMvJQJE8khTTvlUN8tGxmzoaDYoe1aCGNxmDEQnx8Y5gKzVd89sylhqi54d2nQrJ2-ElfEDsMoXpR01Ps6fNDFtLTuPTp7ixj8LvgL2nCjAg996Ga3PtdvXJAZPDYCqqvaBkZZcsiqOgqLV0vGo3VVlfrcgJXQImMYRr_Inu0FCe47A93IAWuQTs-KplM1KdGJsHSnNBV6oe6QEkROJT5qZME-8xhvBYvOXqp9Znwg39bmiBCMxk26Ce66_vw06MNgoa3D5AlXPWemfdVKPZDnj_aLVjZSs0gAfFElcVn7l9yjWJOaT2Muk26U8bJl-2BEq_DSclqKHODuYM5kkPKIdE4NFrsqsDYuGxcA25rlNETFyl0q-UXj1aoz_joy5Hdnr4mFELmjnoo4jYQuakufP9xeGPsj1skaodKl0mmoGcCD6v1F60' \ ---data-raw '{ - "operation":"refresh_operation_token" -}' -``` - -This will return a new `operation_token`. An example expected return object is: - -```bash -{ - "operation_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6eyJfX2NyZWF0ZWR0aW1lX18iOjE2MDQ5NzgxODkxNTEsIl9fdXBkYXRlZHRpbWVfXyI6MTYwNDk3ODE4OTE1MSwiYWN0aXZlIjp0cnVlLCJyb2xlIjp7Il9fY3JlYXRlZHRpbWVfXyI6MTYwNDk0NDE1MTM0NywiX191cGRhdGVkdGltZV9fIjoxNjA0OTQ0MTUxMzQ3LCJpZCI6IjdiNDNlNzM1LTkzYzctNDQzYi05NGY3LWQwMzY3Njg5NDc4YSIsInBlcm1pc3Npb24iOnsic3VwZXJfdXNlciI6dHJ1ZSwic3lzdGVtIjp7InRhYmxlcyI6eyJoZGJfdGFibGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9hdHRyaWJ1dGUiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9zY2hlbWEiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl91c2VyIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfcm9sZSI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2pvYiI6eyJyZWFkIjp0cnVlLCJpbnNlcnQiOmZhbHNlLCJ1cGRhdGUiOmZhbHNlLCJkZWxldGUiOmZhbHNlLCJhdHRyaWJ1dGVfcGVybWlzc2lvbnMiOltdfSwiaGRiX2xpY2Vuc2UiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl9pbmZvIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119LCJoZGJfbm9kZXMiOnsicmVhZCI6dHJ1ZSwiaW5zZXJ0IjpmYWxzZSwidXBkYXRlIjpmYWxzZSwiZGVsZXRlIjpmYWxzZSwiYXR0cmlidXRlX3Blcm1pc3Npb25zIjpbXX0sImhkYl90ZW1wIjp7InJlYWQiOnRydWUsImluc2VydCI6ZmFsc2UsInVwZGF0ZSI6ZmFsc2UsImRlbGV0ZSI6ZmFsc2UsImF0dHJpYnV0ZV9wZXJtaXNzaW9ucyI6W119fX19LCJyb2xlIjoic3VwZXJfdXNlciJ9LCJ1c2VybmFtZSI6InVzZXJuYW1lIn0sImlhdCI6MTYwNDk3ODcxMywiZXhwIjoxNjA1MDY1MTEzLCJzdWIiOiJvcGVyYXRpb24ifQ.qB4FS7fzryCO5epQlFCQe4mQcUEhzXjfsXRFPgauXrGZwSeSr2o2a1tE1xjiI3qjK0r3f2bdi2xpFlDR1thdY-m0mOpHTICNOae4KdKzp7cyzRaOFurQnVYmkWjuV_Ww4PJgr6P3XDgXs5_B2d7ZVBR-BaAimYhVRIIShfpWk-4iN1XDk96TwloCkYx01BuN87o-VOvAnOG-K_EISA9RuEBpSkfUEuvHx8IU4VgfywdbhNMh6WXM0VP7ZzSpshgsS07MGjysGtZHNTVExEvFh14lyfjfqKjDoIJbo2msQwD2FvrTTb0iaQry1-Wwz9QJjVAUtid7tJuP8aBeNqvKyMIXRVnl5viFUr-Gs-Zl_WtyVvKlYWw0_rUn3ucmurK8tTy6iHyJ6XdUf4pYQebpEkIvi2rd__e_Z60V84MPvIYs6F_8CAy78aaYmUg5pihUEehIvGRj1RUZgdfaXElw90-m-M5hMOTI04LrzzVnBu7DcMYg4UC1W-WDrrj4zUq7y8_LczDA-yBC2-bkvWwLVtHLgV5yIEuIx2zAN74RQ4eCy1ffWDrVxYJBau4yiIyCc68dsatwHHH6bMK0uI9ib6Y9lsxCYjh-7MFcbP-4UBhgoDDXN9xoUToDLRqR9FTHqAHrGHp7BCdF5d6TQTVL5fmmg61MrLucOo-LZBXs1NY" -} -``` - -The `refresh_token` also expires at a set interval, but a longer interval. Once it expires it will no longer be accepted by Harper. This duration defaults to thirty days, and is configurable in [harperdb-config.yaml](../../deployments/configuration). To generate a new `operation_token` and a new `refresh_token` the `create_authentication_tokensoperation` is called. - -## Configuration - -Token timeouts are configurable in [harperdb-config.yaml](../../deployments/configuration) with the following parameters: - -- `operationsApi.authentication.operationTokenTimeout`: Defines the length of time until the operation_token expires (default 1d). -- `operationsApi.authentication.refreshTokenTimeout`: Defines the length of time until the refresh_token expires (default 30d). - -A full list of valid values for both parameters can be found [here](https://github.com/vercel/ms). diff --git a/versioned_docs/version-4.7/developers/security/mtls-auth.md b/versioned_docs/version-4.7/developers/security/mtls-auth.md deleted file mode 100644 index f757f60d..00000000 --- a/versioned_docs/version-4.7/developers/security/mtls-auth.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: mTLS Authentication ---- - -# mTLS Authentication - -Harper supports mTLS authentication for incoming connections. When enabled in the [HTTP config settings](../../deployments/configuration#http) the client certificate will be checked against the certificate authority specified with `tls.certificateAuthority`. If the certificate can be properly verified, the connection will authenticate users where the user's id/username is specified by the `CN` (common name) from the client certificate's `subject`, by default. The [HTTP config settings](../../deployments/configuration#http) allow you to determine if mTLS is required for all connections or optional. - -## Certificate Revocation Checking - -When using mTLS authentication, you can optionally enable certificate revocation checking to ensure that revoked certificates cannot be used, even if they are otherwise valid and trusted. This adds an important security layer by checking whether certificates have been explicitly revoked by the issuing Certificate Authority. - -Harper supports both CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) for checking certificate revocation status, using a CRL-first strategy with OCSP fallback for optimal performance and reliability. - -**To enable certificate verification:** - -```yaml -http: - mtls: - required: true - certificateVerification: true # Enable revocation checking -``` - -Certificate revocation checking is **disabled by default** and must be explicitly enabled. For detailed information about certificate revocation checking, including configuration options, performance considerations, and best practices, see [Certificate Management - Certificate Revocation Checking](./certificate-management#certificate-revocation-checking). diff --git a/versioned_docs/version-4.7/developers/security/users-and-roles.md b/versioned_docs/version-4.7/developers/security/users-and-roles.md deleted file mode 100644 index cff17e5a..00000000 --- a/versioned_docs/version-4.7/developers/security/users-and-roles.md +++ /dev/null @@ -1,273 +0,0 @@ ---- -title: Users & Roles ---- - -# Users & Roles - -Harper utilizes a Role-Based Access Control (RBAC) framework to manage access to Harper instances. A user is assigned a role that determines the user’s permissions to access database resources and run core operations. - -## Roles in Harper - -Role permissions in Harper are broken into two categories – permissions around database manipulation and permissions around database definition. - -**Database Manipulation**: A role defines CRUD (create, read, update, delete) permissions against database resources (i.e. data) in a Harper instance. - -1. At the table-level access, permissions must be explicitly defined when adding or altering a role – _i.e. Harper will assume CRUD access to be FALSE if not explicitly provided in the permissions JSON passed to the `add_role` and/or `alter_role` API operations._ -1. At the attribute-level, permissions for attributes in all tables included in the permissions set will be assigned based on either the specific attribute-level permissions defined in the table’s permission set or, if there are no attribute-level permissions defined, permissions will be based on the table’s CRUD set. - -**Database Definition**: Permissions related to managing databases, tables, roles, users, and other system settings and operations are restricted to the built-in `super_user` role. - -**Built-In Roles** - -There are three built-in roles within Harper. See full breakdown of operations restricted to only super_user roles [here](users-and-roles#role-based-operation-restrictions). - -- `super_user` - This role provides full access to all operations and methods within a Harper instance, this can be considered the admin role. - - This role provides full access to all Database Definition operations and the ability to run Database Manipulation operations across the entire database schema with no restrictions. -- `cluster_user` - This role is an internal system role type that is managed internally to allow clustered instances to communicate with one another. - - This role is an internally managed role to facilitate communication between clustered instances. -- `structure_user` - This role provides specific access for creation and deletion of data. - - When defining this role type you can either assign a value of true which will allow the role to create and drop databases & tables. Alternatively the role type can be assigned a string array. The values in this array are databases and allows the role to only create and drop tables in the designated databases. - -**User-Defined Roles** - -In addition to built-in roles, admins (i.e. users assigned to the super_user role) can create customized roles for other users to interact with and manipulate the data within explicitly defined tables and attributes. - -- Unless the user-defined role is given `super_user` permissions, permissions must be defined explicitly within the request body JSON. -- Describe operations will return metadata for all databases, tables, and attributes that a user-defined role has CRUD permissions for. - -**Role Permissions** - -When creating a new, user-defined role in a Harper instance, you must provide a role name and the permissions to assign to that role. _Reminder, only super users can create and manage roles._ - -- `role` name used to easily identify the role assigned to individual users. - - _Roles can be altered/dropped based on the role name used in and returned from a successful `add_role` , `alter_role`, or `list_roles` operation._ - -- `permissions` used to explicitly define CRUD access to existing table data. - -Example JSON for `add_role` request - -```json -{ - "operation": "add_role", - "role": "software_developer", - "permission": { - "super_user": false, - "database_name": { - "tables": { - "table_name1": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [ - { - "attribute_name": "attribute1", - "read": true, - "insert": true, - "update": true - } - ] - }, - "table_name2": { - "read": true, - "insert": true, - "update": true, - "delete": false, - "attribute_permissions": [] - } - } - } - } -} -``` - -**Setting Role Permissions** - -There are two parts to a permissions set: - -- `super_user` – boolean value indicating if role should be provided super_user access. - - _If `super_user` is set to true, there should be no additional database-specific permissions values included since the role will have access to the entire database schema. If permissions are included in the body of the operation, they will be stored within Harper, but ignored, as super_users have full access to the database._ - -- `permissions`: Database tables that a role should have specific CRUD access to should be included in the final, database-specific `permissions` JSON. - - _For user-defined roles (i.e. non-super_user roles, blank permissions will result in the user being restricted from accessing any of the database schema._ - -**Table Permissions JSON** - -Each table that a role should be given some level of CRUD permissions to must be included in the `tables` array for its database in the roles permissions JSON passed to the API (_see example above_). - -```jsonc -{ - "table_name": { // the name of the table to define CRUD perms for - "read": boolean, // access to read from this table - "insert": boolean, // access to insert data to table - "update": boolean, // access to update data in table - "delete": boolean, // access to delete row data in table - "attribute_permissions": [ // permissions for specific table attributes - { - "attribute_name": "attribute_name", // attribute to assign permissions to - "read": boolean, // access to read this attribute from table - "insert": boolean, // access to insert this attribute into the table - "update": boolean // access to update this attribute in the table - } - ] - } -} -``` - -**Important Notes About Table Permissions** - -1. If a database and/or any of its tables are not included in the permissions JSON, the role will not have any CRUD access to the database and/or tables. -1. If a table-level CRUD permission is set to false, any attribute-level with that same CRUD permission set to true will return an error. - -**Important Notes About Attribute Permissions** - -1. If there are attribute-specific CRUD permissions that need to be enforced on a table, those need to be explicitly described in the `attribute_permissions` array. -1. If a non-hash attribute is given some level of CRUD access, that same access will be assigned to the table’s `hash_attribute` (also referred to as the `primary_key`), even if it is not explicitly defined in the permissions JSON. - - _See table_name1’s permission set for an example of this – even though the table’s hash attribute is not specifically defined in the attribute_permissions array, because the role has CRUD access to ‘attribute1’, the role will have the same access to the table’s hash attribute._ - -1. If attribute-level permissions are set – _i.e. attribute_permissions.length > 0_ – any table attribute not explicitly included will be assumed to have not CRUD access (with the exception of the `hash_attribute` described in #2). - - _See table_name1’s permission set for an example of this – in this scenario, the role will have the ability to create, insert and update ‘attribute1’ and the table’s hash attribute but no other attributes on that table._ - -1. If an `attribute_permissions` array is empty, the role’s access to a table’s attributes will be based on the table-level CRUD permissions. - - _See table_name2’s permission set for an example of this._ - -1. The `__createdtime__` and `__updatedtime__` attributes that Harper manages internally can have read perms set but, if set, all other attribute-level permissions will be ignored. -1. Please note that DELETE permissions are not included as a part of an individual attribute-level permission set. That is because it is not possible to delete individual attributes from a row, rows must be deleted in full. - - If a role needs the ability to delete rows from a table, that permission should be set on the table-level. - - The practical approach to deleting an individual attribute of a row would be to set that attribute to null via an update statement. - -## Role-Based Operation Restrictions - -The table below includes all API operations available in Harper and indicates whether or not the operation is restricted to super_user roles. - -_Keep in mind that non-super_user roles will also be restricted within the operations they do have access to by the database-level CRUD permissions set for the roles._ - -| Databases and Tables | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| describe_all | | -| describe_database | | -| describe_table | | -| create_database | X | -| drop_database | X | -| create_table | X | -| drop_table | X | -| create_attribute | | -| drop_attribute | X | - -| NoSQL Operations | Restricted to Super_Users | -| -------------------- | :-----------------------: | -| insert | | -| update | | -| upsert | | -| delete | | -| search_by_hash | | -| search_by_value | | -| search_by_conditions | | - -| SQL Operations | Restricted to Super_Users | -| -------------- | :-----------------------: | -| select | | -| insert | | -| update | | -| delete | | - -| Bulk Operations | Restricted to Super_Users | -| --------------- | :-----------------------: | -| csv_data_load | | -| csv_file_load | | -| csv_url_load | | -| import_from_s3 | | - -| Users and Roles | Restricted to Super_Users | -| --------------- | :-----------------------: | -| list_roles | X | -| add_role | X | -| alter_role | X | -| drop_role | X | -| list_users | X | -| user_info | | -| add_user | X | -| alter_user | X | -| drop_user | X | - -| Clustering | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| cluster_set_routes | X | -| cluster_get_routes | X | -| cluster_delete_routes | X | -| add_node | X | -| update_node | X | -| cluster_status | X | -| remove_node | X | -| configure_cluster | X | - -| Components | Restricted to Super_Users | -| ------------------ | :-----------------------: | -| get_components | X | -| get_component_file | X | -| set_component_file | X | -| drop_component | X | -| add_component | X | -| package_component | X | -| deploy_component | X | - -| Custom Functions | Restricted to Super_Users | -| ------------------------------- | :-----------------------: | -| custom_functions_status | X | -| get_custom_functions | X | -| get_custom_function | X | -| set_custom_function | X | -| drop_custom_function | X | -| add_custom_function_project | X | -| drop_custom_function_project | X | -| package_custom_function_project | X | -| deploy_custom_function_project | X | - -| Registration | Restricted to Super_Users | -| ----------------- | :-----------------------: | -| registration_info | | -| get_fingerprint | X | -| set_license | X | - -| Jobs | Restricted to Super_Users | -| ------------------------- | :-----------------------: | -| get_job | | -| search_jobs_by_start_date | X | - -| Logs | Restricted to Super_Users | -| ------------------------------ | :-----------------------: | -| read_log | X | -| read_transaction_log | X | -| delete_transaction_logs_before | X | -| read_audit_log | X | -| delete_audit_logs_before | X | - -| Utilities | Restricted to Super_Users | -| --------------------- | :-----------------------: | -| delete_records_before | X | -| export_local | X | -| export_to_s3 | X | -| system_information | X | -| restart | X | -| restart_service | X | -| get_configuration | X | -| configure_cluster | X | - -| Token Authentication | Restricted to Super_Users | -| ---------------------------- | :-----------------------: | -| create_authentication_tokens | | -| refresh_operation_token | | - -## Error: Must execute as User - -**You may have gotten an error like,** `Error: Must execute as <>`. - -This means that you installed Harper as `<>`. Because Harper stores files natively on the operating system, we only allow the Harper executable to be run by a single user. This prevents permissions issues on files. - -For example if you installed as user_a, but later wanted to run as user_b. User_b may not have access to the hdb files Harper needs. This also keeps Harper more secure as it allows you to lock files down to a specific user and prevents other users from accessing your files. diff --git a/versioned_docs/version-4.7/index.mdx b/versioned_docs/version-4.7/index.mdx deleted file mode 100644 index a46de397..00000000 --- a/versioned_docs/version-4.7/index.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: Harper Docs ---- - -import CustomDocCardList from '@site/src/components/CustomDocCardList'; - -# Harper Docs - -:::info - -### Get the Most Out of Harper - -Join our Discord to access expert support, collaborate with Harper’s core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) -::: - -Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. - -Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. - -## Getting Started - -The best way to get started using Harper is to head over to the [Learn](/learn/) section and work through the Getting Started and Developer guides. - -## Building with Harper - - diff --git a/versioned_docs/version-4.7/reference/_category_.json b/versioned_docs/version-4.7/reference/_category_.json deleted file mode 100644 index 1a36ae90..00000000 --- a/versioned_docs/version-4.7/reference/_category_.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "link": { - "type": "doc", - "id": "reference/index" - } -} diff --git a/versioned_docs/version-4.7/reference/analytics.md b/versioned_docs/version-4.7/reference/analytics.md deleted file mode 100644 index 742a299d..00000000 --- a/versioned_docs/version-4.7/reference/analytics.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: Analytics ---- - -# Analytics - -Harper provides extensive telemetry and analytics data to help monitor the status of the server and work loads, and to help understand traffic and usage patterns to identify issues and scaling needs, and identify queries and actions that are consuming the most resources. - -Harper collects statistics for all operations, URL endpoints, and messaging topics, aggregating information by thread, operation, resource, and methods, in real-time. These statistics are logged in the `hdb_raw_analytics` and `hdb_analytics` table in the `system` database. - -There are two "levels" of analytics in the Harper analytics table: the first is the immediate level of raw direct logging of real-time statistics. These analytics entries are recorded once a second (when there is activity) by each thread, and include all recorded activity in the last second, along with system resource information. The records have a primary key that is the timestamp in milliseconds since epoch. This can be queried (with `superuser` permission) using the search_by_conditions operation (this will search for 10 seconds worth of analytics) on the `hdb_raw_analytics` table: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_raw_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [168859400000, 1688594010000] - }] -} -``` - -And a typical response looks like: - -``` -{ - "time": 1688594390708, - "period": 1000.8336279988289, - "metrics": [ - { - "metric": "bytes-sent", - "path": "search_by_conditions", - "type": "operation", - "median": 202, - "mean": 202, - "p95": 202, - "p90": 202, - "count": 1 - }, - ... - { - "metric": "memory", - "threadId": 2, - "rss": 1492664320, - "heapTotal": 124596224, - "heapUsed": 119563120, - "external": 3469790, - "arrayBuffers": 798721 - }, - { - "metric": "utilization", - "idle": 138227.52767700003, - "active": 70.5066209952347, - "utilization": 0.0005098165086230495 - } - ], - "threadId": 2, - "totalBytesProcessed": 12182820, - "id": 1688594390708.6853 -} -``` - -The second level of analytics recording is aggregate data. The aggregate records are recorded once a minute, and aggregate the results from all the per-second entries from all the threads, creating a summary of statistics once a minute. The ids for these milliseconds since epoch can be queried from the `hdb_analytics` table. You can query these with an operation like: - -``` -POST http://localhost:9925 -Content-Type: application/json - -{ - "operation": "search_by_conditions", - "schema": "system", - "table": "hdb_analytics", - "conditions": [{ - "search_attribute": "id", - "search_type": "between", - "search_value": [1688194100000, 1688594990000] - }] -} -``` - -And a summary record looks like: - -``` -{ - "period": 60000, - "metric": "bytes-sent", - "method": "connack", - "type": "mqtt", - "median": 4, - "mean": 4, - "p95": 4, - "p90": 4, - "count": 1, - "id": 1688589569646, - "time": 1688589569646 -} -``` - -# Standard Analytics Metrics - -While applications can define their own metrics, Harper provides a set of standard metrics that are tracked for all services: - -## HTTP - -The following metrics are tracked for all HTTP requests: - -| `metric` | `path` | `method` | `type` | Unit | Description | -| ------------------ | ------------- | -------------- | ---------------------------------------------- | ------------ | ------------------------------------------------------- | -| `duration` | resource path | request method | `cache-hit` or `cache-miss` if a caching table | milliseconds | Duration of request handler | -| `duration` | route path | request method | fastify-route | milliseconds | | -| `duration` | operation | | operation | milliseconds | | -| `success` | resource path | request method | | % | | -| `success` | route path | request method | fastify-route | % | | -| `success` | operation | | operation | % | | -| `bytes-sent` | resource path | request method | | bytes | | -| `bytes-sent` | route path | request method | fastify-route | bytes | | -| `bytes-sent` | operation | | operation | bytes | | -| `transfer` | resource path | request method | operation | milliseconds | duration of transfer | -| `transfer` | route path | request method | fastify-route | milliseconds | duration of transfer | -| `transfer` | operation | | operation | milliseconds | duration of transfer | -| `socket-routed` | | | | % | percentage of sockets that could be immediately routed | -| `tls-handshake` | | | | milliseconds | | -| `tls-reused` | | | | % | percentage of TLS that reuses sessions | -| `cache-hit` | table name | | | % | The percentage of cache hits | -| `cache-resolution` | table name | | | milliseconds | The duration of resolving requests for uncached entries | - -The following are metrics for real-time MQTT connections: -| `metric` | `path` | `method` | `type` | Unit | Description | -|---|---|---|---|---|---| -| `mqtt-connections` | | | | count | The number of open direct MQTT connections | -| `ws-connections` | | | | count | number of open WS connections| -| `connection` | `mqtt` | `connect` | | % | percentage of successful direct MQTT connections | -| `connection` | `mqtt` | `disconnect` | | % | percentage of explicit direct MQTT disconnects | -| `connection` | `ws` | `connect` | | % | percentage of successful WS connections | -| `connection` | `ws` | `disconnect` | | % | percentage of explicit WS disconnects | -| `bytes-sent` | topic | mqtt command | `mqtt` | bytes | The number of bytes sent for a given command and topic | - -The following are metrics for replication: - -| `metric` | `path` | `method` | `type` | Unit | Description | -| ---------------- | ------------- | ------------- | --------- | ----- | ----------------------------------------------------- | -| `bytes-sent` | node.database | `replication` | `egress` | bytes | The number of bytes sent for replication | -| `bytes-sent` | node.database | `replication` | `blob` | bytes | The number of bytes sent for replication of blobs | -| `bytes-received` | node.database | `replication` | `ingress` | bytes | The number of bytes received for replication | -| `bytes-received` | node.database | `replication` | `blob` | bytes | The number of bytes received for replication of blobs | - -The following are general resource usage statistics that are tracked: - -| `metric` | primary attribute(s) | other attribute(s) | Unit | Description | -| ------------------------- | ------------------------------------------------------------------------------------------------ | ------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------- | -| `database-size` | `size`, `used`, `free`, `audit` | `database` | bytes | The size of the database in bytes | -| `main-thread-utilization` | `idle`, `active`, `taskQueueLatency`, `rss`, `heapTotal`, `heapUsed`, `external`, `arrayBuffers` | `time` | various | Main thread resource usage; including idle time, active time, task queue latency, RSS, heap, buffer and external memory usage | -| `resource-usage` | | | various | [See breakout below](#resource-usage) | -| `storage-volume` | `available`, `free`, `size` | `database` | bytes | The size of the storage volume in bytes | -| `table-size` | `size` | `database`, `table` | bytes | The size of the table in bytes | -| `utilization` | | | % | How much of the time the worker was processing requests | - - -`resource-usage` metrics are everything returned by [node:process.resourceUsage()](https://nodejs.org/api/process.html#processresourceusage)[^1] plus the following additional metrics: - -| `metric` | Unit | Description | -| ---------------- | ---- | ----------------------------------------------------- | -| `time` | ms | Current time when metric was recorded (Unix time) | -| `period` | ms | Duration of the metric period | -| `cpuUtilization` | % | CPU utilization percentage (user and system combined) | - -[^1]: The `userCPUTime` and `systemCPUTime` metrics are converted to milliseconds to match the other time-related metrics. diff --git a/versioned_docs/version-4.7/reference/architecture.md b/versioned_docs/version-4.7/reference/architecture.md deleted file mode 100644 index 4155d5ff..00000000 --- a/versioned_docs/version-4.7/reference/architecture.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: Architecture ---- - -# Architecture - -Harper's architecture consists of resources, which includes tables and user defined data sources and extensions, and server interfaces, which includes the RESTful HTTP interface, operations API, and MQTT. Servers are supported by routing and auth services. - -``` - ┌──────────┐ ┌──────────┐ - │ Clients │ │ Clients │ - └────┬─────┘ └────┬─────┘ - │ │ - ▼ ▼ - ┌────────────────────────────────────────┐ - │ │ - │ Socket routing/management │ - ├───────────────────────┬────────────────┤ - │ │ │ - │ Server Interfaces ─►│ Authentication │ - │ RESTful HTTP, MQTT │ Authorization │ - │ ◄─┤ │ - │ ▲ └────────────────┤ - │ │ │ │ - ├───┼──────────┼─────────────────────────┤ - │ │ │ ▲ │ - │ ▼ Resources ▲ │ ┌───────────┐ │ - │ │ └─┤ │ │ - ├─────────────────┴────┐ │ App │ │ - │ ├─►│ resources │ │ - │ Database tables │ └───────────┘ │ - │ │ ▲ │ - ├──────────────────────┘ │ │ - │ ▲ ▼ │ │ - │ ┌────────────────┐ │ │ - │ │ External │ │ │ - │ │ data sources ├────┘ │ - │ │ │ │ - │ └────────────────┘ │ - │ │ - └────────────────────────────────────────┘ -``` diff --git a/versioned_docs/version-4.7/reference/blob.md b/versioned_docs/version-4.7/reference/blob.md deleted file mode 100644 index 57dd7081..00000000 --- a/versioned_docs/version-4.7/reference/blob.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: Blob ---- - -# Blob - -Blobs are binary large objects that can be used to store any type of unstructured/binary data and is designed for large content. Blobs support streaming and feature better performance for content larger than about 20KB. Blobs are built off the native JavaScript `Blob` type, and HarperDB extends the native `Blob` type for integrated storage with the database. To use blobs, you would generally want to declare a field as a `Blob` type in your schema: - -```graphql -type MyTable { - id: Any! @primaryKey - data: Blob -} -``` - -You can then create a blob which writes the binary data to disk, and can then be included (as a reference) in a record. For example, you can create a record with a blob like: - -```javascript -let blob = createBlob(largeBuffer); -await MyTable.put({ id: 'my-record', data: blob }); -``` - -The `data` attribute in this example is a blob reference, and can be used like any other attribute in the record, but it is stored separately, and the data must be accessed asynchronously. You can retrieve the blob data with the standard `Blob` methods: - -```javascript -let buffer = await blob.bytes(); -``` - -If you are creating a resource method, you can return a `Response` object with a blob as the body: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - let record = super.get(target); - return { - status: 200, - headers: {}, - body: record.data, // record.data is a blob - }; - } -} -``` - -When using the exported REST APIs for your tables, blobs will by default be treated with a UTF-8 encoding and contain text/plain content. - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "Why hello there, world!" - }' -``` - -To store arbitrary binary content (such as audio data) in a blob, using CBOR is recommended when making API requests. This will let you control the contents of the blob precisely. - -If you need to use JSON, Base64 encoding your contents can be a great choice, but you'll need to do a bit of work to control the encoding of the underlying blob: - -```typescript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; - - create(target: RequestTarget, record: Partial) { - if (record.data) { - record.data = Buffer.from(record.data, 'base64'); - } - return super.create(target, record); - } -} -``` - -Now you can create records and they'll be encoded appropriately. For example, here's a small .jpg encoded in base64: - -```bash -curl -X POST --location "http://localhost:9926/MyTable/" \ - -H "Content-Type: application/json" \ - -d '{ - "data": "/9j/4QDKRXhpZgAATU0AKgAAAAgABgESAAMAAAABAAEAAAEaAAUAAAABAAAAVgEbAAUAAAABAAAAXgEoAAMAAAABAAIAAAITAAMAAAABAAEAAIdpAAQAAAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAeQAAAHAAAABDAyMjGRAQAHAAAABAECAwCgAAAHAAAABDAxMDCgAQADAAAAAQABAACgAgAEAAAAAQAAABCgAwAEAAAAAQAAABCkBgADAAAAAQAAAAAAAAAAAAD/2wCEAAEBAQEBAQIBAQIDAgICAwQDAwMDBAYEBAQEBAYHBgYGBgYGBwcHBwcHBwcICAgICAgJCQkJCQsLCwsLCwsLCwsBAgICAwMDBQMDBQsIBggLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLC//dAAQAAf/AABEIABAAEAMBIgACEQEDEQH/xAGiAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgsQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/APz68CaN8Mp/DWveJviDqE0R0qGIwWsGEaR532J83uwwABXH+MtP8N6Hryad4cvJrm3lgjlX7WES4R2zujcIAvy8YIHQ+1eYeKdAu9VtTNpUvk3aAeWSxCblOVJA4O08jIrR0/R1txDc37m4u0QK8p7tjkgdBmv2zD4apGvUq1KjcXtHTTRWP0nEUqzxcatKbUEkuWy5fN3+Lmvt0tp2t//Z" - }' -``` - -One of the important characteristics of blobs is they natively support asynchronous streaming of data. This is important for both creation and retrieval of large data. When we create a blob with `createBlob`, the returned blob will create the storage entry, but the data will be streamed to storage. This means that you can create a blob from a buffer or from a stream. You can also create a record that references a blob before the blob is fully written to storage. For example, you can create a blob from a stream: - -```javascript -let blob = createBlob(stream); -// at this point the blob exists, but the data is still being written to storage -await MyTable.put({ id: 'my-record', data: blob }); -// we now have written a record that references the blob -let record = await MyTable.get('my-record'); -// we now have a record that gives us access to the blob. We can asynchronously access the blob's data or stream the data, and it will be available as blob the stream is written to the blob. -let stream = record.data.stream(); -``` - -This can be powerful functionality for large media content, where content can be streamed into storage as it streamed out in real-time to users as it is received, or even for web content where low latency transmission of data from origin is critical. However, this also means that blobs are _not_ atomic or [ACID](https://en.wikipedia.org/wiki/ACID) compliant; streaming functionality achieves the opposite behavior of ACID/atomic writes that would prevent access to data as it is being written, and wait until data is fully available before a commit. Alternately, we can also use the `saveBeforeCommit` flag to indicate that the blob should be fully written to storage before committing a transaction to ensure that the whole blob is available before the transaction commits and writes the record: - -```javascript -let blob = createBlob(stream, { saveBeforeCommit: true }); -// this put will not commit and resolve until the blob is written and then the record is written -await MyTable.put({ id: 'my-record', data: blob }); -``` - -Note that using `saveBeforeCommit` does not necessarily guarantee full ACID compliance. This can be combined with the `flush` flag to provide a stronger guarantee that a blob is flushed to disk before commiting a transaction. However, the error handling below provides a stronger guarantee of proper blob handling when the process of streaming/writing a blob is interrupted and using proper error handling is recommended, instead of relying `saveBeforeCommit`, for the best combination reliability and performance. - -### Error Handling - -Because blobs can be streamed and referenced prior to their completion, there is a chance that an error or interruption could occur while streaming data to the blob (after the record is committed). We can create an error handler for the blob to handle the case of an interrupted blob: - -```javascript -export class MyEndpoint extends MyTable { - async get(target) { - const record = super.get(target); - let blob = record.data; - blob.on('error', () => { - // if this was a caching table, we may want to invalidate or delete this record: - MyTable.invalidate(target); - // we may want to re-retrieve the blob - }); - return { - status: 200, - headers: {}, - body: blob - }); - } -} -``` - -### Blob `size` - -Blobs that are created from streams may not have the standard `size` property available, because the size may not be known while data is being streamed. Consequently, the `size` property may be undefined until the size is determined. You can listen for the `size` event to be notified when the size is available: - -```javascript -let record = await MyTable.get('my-record'); -let blob = record.data; -blob.size; // will be available if it was saved with a known size -let stream = blob.stream(); // start streaming the data -if (blob.size === undefined) { - blob.on('size', (size) => { - // will be called once the size is available - }); -} -``` - -### Blob Coercion - -When a field is defined to use the `Blob` type, any strings or buffers that are assigned to that field in a `put`, `patch`, or `publish`, will automatically be coerced to a `Blob`. This makes it easy to use a `Blob` type even with JSON data that may come HTTP request bodies or MQTT messages, that do not natively support a `Blob` type. - -See the [configuration](../deployments/configuration) documentation for more information on configuring where blob are stored. diff --git a/versioned_docs/version-4.7/reference/clustering/certificate-management.md b/versioned_docs/version-4.7/reference/clustering/certificate-management.md deleted file mode 100644 index a11a1a35..00000000 --- a/versioned_docs/version-4.7/reference/clustering/certificate-management.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: Certificate Management ---- - -# Certificate Management - -## Development - -Out of the box Harper generates certificates that are used when Harper nodes are clustered together to securely share data between nodes. These certificates are meant for testing and development purposes. Because these certificates do not have Common Names (CNs) that will match the Fully Qualified Domain Name (FQDN) of the Harper node, the following settings (see the full [configuration file](../../deployments/configuration) docs for more details) are defaulted & recommended for ease of development: - -``` -clustering: - tls: - certificate: ~/hdb/keys/certificate.pem - certificateAuthority: ~/hdb/keys/ca.pem - privateKey: ~/hdb/keys/privateKey.pem - insecure: true - verify: true -``` - -The certificates that Harper generates are stored in your `/keys/`. - -`insecure` is set to `true` to accept the certificate CN mismatch due to development certificates. - -`verify` is set to `true` to enable mutual TLS between the nodes. - -## Production - -In a production environment, we recommend using your own certificate authority (CA), or a public CA such as LetsEncrypt to generate certs for your Harper cluster. This will let you generate certificates with CNs that match the FQDN of your nodes. - -Once you generate new certificates, to make Harper start using them you can either replace the generated files with your own, or update the configuration to point to your new certificates, and then restart Harper. - -Since these new certificates can be issued with correct CNs, you should set `insecure` to `false` so that nodes will do full validation of the certificates of the other nodes. - -### Certificate Revocation Checking - -Harper automatically performs certificate revocation checking using OCSP (Online Certificate Status Protocol) for all cluster connections. This critical security feature ensures that: - -- Revoked certificates cannot be used for cluster communication -- Compromised nodes can be quickly isolated by revoking their certificates -- Certificate status is verified in real-time with the Certificate Authority - -Certificate verification is enabled by default for cluster connections and follows the same configuration as HTTP mTLS connections. The verification settings can be customized in the HTTP configuration section to balance security requirements with performance considerations. - -For production clusters, consider using `failureMode: fail-closed` to ensure maximum security by rejecting connections when OCSP verification cannot be completed. - -### Certificate Requirements - -- Certificates must have an `Extended Key Usage` that defines both `TLS Web Server Authentication` and `TLS Web Client Authentication` as these certificates will be used to accept connections from other Harper nodes and to make requests to other Harper nodes. Example: - -``` -X509v3 Key Usage: critical - Digital Signature, Key Encipherment -X509v3 Extended Key Usage: - TLS Web Server Authentication, TLS Web Client Authentication -``` - -- If you are using an intermediate CA to issue the certificates, the entire certificate chain (to the root CA) must be included in the `certificateAuthority` file. -- If your certificates expire you will need a way to issue new certificates to the nodes and then restart Harper. If you are using a public CA such as LetsEncrypt, a tool like `certbot` can be used to renew certificates. - -### Certificate Troubleshooting - -If you are having TLS issues with clustering, use the following steps to verify that your certificates are valid. - -1. Make sure certificates can be parsed and that you can view the contents: - -``` -openssl x509 -in .pem -noout -text` -``` - -1. Make sure the certificate validates with the CA: - -``` -openssl verify -CAfile .pem .pem` -``` - -1. Make sure the certificate and private key are a valid pair by verifying that the output of the following commands match: - -``` -openssl rsa -modulus -noout -in .pem | openssl md5 -openssl x509 -modulus -noout -in .pem | openssl md5 -``` diff --git a/versioned_docs/version-4.7/reference/clustering/creating-a-cluster-user.md b/versioned_docs/version-4.7/reference/clustering/creating-a-cluster-user.md deleted file mode 100644 index 0a8b2a6c..00000000 --- a/versioned_docs/version-4.7/reference/clustering/creating-a-cluster-user.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Creating a Cluster User ---- - -# Creating a Cluster User - -Inter-node authentication takes place via Harper users. There is a special role type called `cluster_user` that exists by default and limits the user to only clustering functionality. - -A `cluster_user` must be created and added to the `harperdb-config.yaml` file for clustering to be enabled. - -All nodes that are intended to be clustered together need to share the same `cluster_user` credentials (i.e. username and password). - -There are multiple ways a `cluster_user` can be created, they are: - -1. Through the operations API by calling `add_user` - -```json -{ - "operation": "add_user", - "role": "cluster_user", - "username": "cluster_account", - "password": "letsCluster123!", - "active": true -} -``` - -When using the API to create a cluster user the `harperdb-config.yaml` file must be updated with the username of the new cluster user. - -This can be done through the API by calling `set_configuration` or by editing the `harperdb-config.yaml` file. - -```json -{ - "operation": "set_configuration", - "clustering_user": "cluster_account" -} -``` - -In the `harperdb-config.yaml` file under the top-level `clustering` element there will be a user element. Set this to the name of the cluster user. - -```yaml -clustering: - user: cluster_account -``` - -_Note: When making any changes to the `harperdb-config.yaml` file, Harper must be restarted for the changes to take effect._ - -1. Upon installation using **command line variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -_Note: Using command line or environment variables for setting the cluster user only works on install._ - -``` -harperdb install --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` - -1. Upon installation using **environment variables**. This will automatically set the user in the `harperdb-config.yaml` file. - -``` -CLUSTERING_USER=cluster_account CLUSTERING_PASSWORD=letsCluster123 -``` diff --git a/versioned_docs/version-4.7/reference/clustering/enabling-clustering.md b/versioned_docs/version-4.7/reference/clustering/enabling-clustering.md deleted file mode 100644 index 606bc29c..00000000 --- a/versioned_docs/version-4.7/reference/clustering/enabling-clustering.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Enabling Clustering ---- - -# Enabling Clustering - -Clustering does not run by default; it needs to be enabled. - -To enable clustering the `clustering.enabled` configuration element in the `harperdb-config.yaml` file must be set to `true`. - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file and setting enabled to `true` - -```yaml -clustering: - enabled: true -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_enabled": true -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. Using **command line variables**. - -``` -harperdb --CLUSTERING_ENABLED true -``` - -1. Using **environment variables**. - -``` -CLUSTERING_ENABLED=true -``` - -An efficient way to **install Harper**, **create the cluster user**, **set the node name** and **enable clustering** in one operation is to combine the steps using command line and/or environment variables. Here is an example using command line variables. - -``` -harperdb install --CLUSTERING_ENABLED true --CLUSTERING_NODENAME Node1 --CLUSTERING_USER cluster_account --CLUSTERING_PASSWORD letsCluster123! -``` diff --git a/versioned_docs/version-4.7/reference/clustering/establishing-routes.md b/versioned_docs/version-4.7/reference/clustering/establishing-routes.md deleted file mode 100644 index 1d4d5ae2..00000000 --- a/versioned_docs/version-4.7/reference/clustering/establishing-routes.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: Establishing Routes ---- - -# Establishing Routes - -A route is a connection between two nodes. It is how the clustering network is established. - -Routes do not need to cross connect all nodes in the cluster. You can select a leader node or a few leaders and all nodes connect to them, you can chain, etc… As long as there is one route connecting a node to the cluster all other nodes should be able to reach that node. - -Using routes the clustering servers will create a mesh network between nodes. This mesh network ensures that if a node drops out all other nodes can still communicate with each other. That being said, we recommend designing your routing with failover in mind, this means not storing all your routes on one node but dispersing them throughout the network. - -A simple route example is a two node topology, if Node1 adds a route to connect it to Node2, Node2 does not need to add a route to Node1. That one route configuration is all that’s needed to establish a bidirectional connection between the nodes. - -A route consists of a `port` and a `host`. - -`port` - the clustering port of the remote instance you are creating the connection with. This is going to be the `clustering.hubServer.cluster.network.port` in the Harper configuration on the node you are connecting with. - -`host` - the host of the remote instance you are creating the connection with.This can be an IP address or a URL. - -Routes are set in the `harperdb-config.yaml` file using the `clustering.hubServer.cluster.network.routes` element, which expects an object array, where each object has two properties, `port` and `host`. - -```yaml -clustering: - hubServer: - cluster: - network: - routes: - - host: 3.62.184.22 - port: 9932 - - host: 3.735.184.8 - port: 9932 -``` - -![figure 1](/img/v4.6/clustering/figure1.png) - -This diagram shows one way of using routes to connect a network of nodes. Node2 and Node3 do not reference any routes in their config. Node1 contains routes for Node2 and Node3, which is enough to establish a network between all three nodes. - -There are multiple ways to set routes, they are: - -1. Directly editing the `harperdb-config.yaml` file (refer to code snippet above). -1. Calling `cluster_set_routes` through the API. - -```json -{ - "operation": "cluster_set_routes", - "server": "hub", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` - -_Note: When making any changes to Harper configuration Harper must be restarted for the changes to take effect._ - -1. From the command line. - -```bash ---CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES "[{\"host\": \"3.735.184.8\", \"port\": 9932}]" -``` - -1. Using environment variables. - -```bash -CLUSTERING_HUBSERVER_CLUSTER_NETWORK_ROUTES=[{"host": "3.735.184.8", "port": 9932}] -``` - -The API also has `cluster_get_routes` for getting all routes in the config and `cluster_delete_routes` for deleting routes. - -```json -{ - "operation": "cluster_delete_routes", - "routes": [{ "host": "3.735.184.8", "port": 9932 }] -} -``` diff --git a/versioned_docs/version-4.7/reference/clustering/index.md b/versioned_docs/version-4.7/reference/clustering/index.md deleted file mode 100644 index fddd3851..00000000 --- a/versioned_docs/version-4.7/reference/clustering/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: NATS Clustering ---- - -# NATS Clustering - -Harper 4.0 - 4.3 used a clustering system based on NATS for replication. In 4.4+, Harper has moved to a new native replication system that has better performance, reliability, and data consistency. This document describes the legacy NATS clustering system. Harper clustering is the process of connecting multiple Harper databases together to create a database mesh network that enables users to define data replication patterns. - -Harper’s clustering engine replicates data between instances of Harper using a highly performant, bi-directional pub/sub model on a per-table basis. Data replicates asynchronously with eventual consistency across the cluster following the defined pub/sub configuration. Individual transactions are sent in the order in which they were transacted, once received by the destination instance, they are processed in an ACID-compliant manner. Conflict resolution follows a last writer wins model based on recorded transaction time on the transaction and the timestamp on the record on the node. - ---- - -### Common Use Case - -A common use case is an edge application collecting and analyzing sensor data that creates an alert if a sensor value exceeds a given threshold: - -- The edge application should not be making outbound http requests for security purposes. -- There may not be a reliable network connection. -- Not all sensor data will be sent to the cloud--either because of the unreliable network connection, or maybe it’s just a pain to store it. -- The edge node should be inaccessible from outside the firewall. -- The edge node will send alerts to the cloud with a snippet of sensor data containing the offending sensor readings. - -Harper simplifies the architecture of such an application with its bi-directional, table-level replication: - -- The edge instance subscribes to a "thresholds" table on the cloud instance, so the application only makes localhost calls to get the thresholds. -- The application continually pushes sensor data into a "sensor_data" table via the localhost API, comparing it to the threshold values as it does so. -- When a threshold violation occurs, the application adds a record to the "alerts" table. -- The application appends to that record array "sensor_data" entries for the 60 seconds (or minutes, or days) leading up to the threshold violation. -- The edge instance publishes the "alerts" table up to the cloud instance. - -By letting Harper focus on the fault-tolerant logistics of transporting your data, you get to write less code. By moving data only when and where it’s needed, you lower storage and bandwidth costs. And by restricting your app to only making local calls to Harper, you reduce the overall exposure of your application to outside forces. diff --git a/versioned_docs/version-4.7/reference/clustering/managing-subscriptions.md b/versioned_docs/version-4.7/reference/clustering/managing-subscriptions.md deleted file mode 100644 index f043c9d1..00000000 --- a/versioned_docs/version-4.7/reference/clustering/managing-subscriptions.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Managing subscriptions ---- - -Tables are replicated when the table is designated as replicating and there is subscription between the nodes. -Tables designated as replicating by default, but can be changed by setting `replicate` to `false` in the table definition: - -```graphql -type Product @table(replicate: false) { - id: ID! - name: String! -} -``` - -Or in your harperdb-config.yaml, you can set the default replication behavior for databases, and indicate which databases -should be replicated by default: - -```yaml -replication: - databases: data -``` - -If a table is not in the list of databases to be replicated, it will not be replicated unless the table is specifically set to replicate: - -```graphql -type Product @table(replicate: true) { - id: ID! - name: String! -} -``` - -Reading hdb*nodes (what we do \_to* the node, not what the node does). - -The subscription can be set to publish, subscribe, or both. - -# Managing subscriptions - -Subscriptions can be added, updated, or removed through the API. - -_Note: The databases and tables in the subscription must exist on either the local or the remote node. Any databases or tables that do not exist on one particular node, for example, the local node, will be automatically created on the local node._ - -To add a single node and create one or more subscriptions use `set_node_replication`. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "data", - "table": "dog", - "publish": false, - "subscribe": true - }, - { - "database": "data", - "table": "chicken", - "publish": true, - "subscribe": true - } - ] -} -``` - -This is an example of adding Node2 to your local node. Subscriptions are created for two tables, dog and chicken. - -To update one or more subscriptions with a single node you can also use `set_node_replication`, however this will behave as a PATCH/upsert, where only the subscription(s) changing will be inserted/update while the others will be left untouched. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ] -} -``` - -This call will update the subscription with the dog table. Any other subscriptions with Node2 will not change. - -To add or update subscriptions with one or more nodes in one API call use `configure_cluster`. - -```json -{ - "operation": "configure_cluster", - "connections": [ - { - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": false, - "subscribe": true - }, - { - "database": "prod", - "table": "dog", - "publish": true, - "subscribe": true - } - ] - }, - { - "node_name": "Node3", - "subscriptions": [ - { - "database": "dev", - "table": "chicken", - "publish": true, - "subscribe": false - } - ] - } - ] -} -``` - -_Note: `configure_cluster` will override **any and all** existing subscriptions defined on the local node. This means that before going through the connections in the request and adding the subscriptions, it will first go through **all existing subscriptions the local node has** and remove them. To get all existing subscriptions use `cluster_status`._ - -#### Start time - -There is an optional property called `start_time` that can be passed in the subscription. This property accepts an ISO formatted UTC date. - -`start_time` can be used to set from what time you would like to source transactions from a table when creating or updating a subscription. - -```json -{ - "operation": "set_node_replication", - "node_name": "Node2", - "subscriptions": [ - { - "database": "dev", - "table": "dog", - "publish": false, - "subscribe": true, - "start_time": "2022-09-02T20:06:35.993Z" - } - ] -} -``` - -This example will get all transactions on Node2’s dog table starting from `2022-09-02T20:06:35.993Z` and replicate them locally on the dog table. - -If no start time is passed it defaults to the current time. - -_Note: start time utilizes clustering to back source transactions. For this reason it can only source transactions that occurred when clustering was enabled._ - -#### Remove node - -To remove a node and all its subscriptions use `remove_node`. - -```json -{ - "operation": "remove_node", - "node_name": "Node2" -} -``` - -#### Cluster status - -To get the status of all connected nodes and see their subscriptions use `cluster_status`. - -```json -{ - "node_name": "Node1", - "is_enabled": true, - "connections": [ - { - "node_name": "Node2", - "status": "open", - "ports": { - "clustering": 9932, - "operations_api": 9925 - }, - "latency_ms": 65, - "uptime": "11m 19s", - "subscriptions": [ - { - "schema": "dev", - "table": "dog", - "publish": true, - "subscribe": true - } - ], - "system_info": { - "hdb_version": "4.0.0", - "node_version": "16.17.1", - "platform": "linux" - } - } - ] -} -``` diff --git a/versioned_docs/version-4.7/reference/clustering/naming-a-node.md b/versioned_docs/version-4.7/reference/clustering/naming-a-node.md deleted file mode 100644 index 7a512efb..00000000 --- a/versioned_docs/version-4.7/reference/clustering/naming-a-node.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Naming a Node ---- - -# Naming a Node - -Node name is the name given to a node. It is how nodes are identified within the cluster and must be unique to the cluster. - -The name cannot contain any of the following characters: `.,*>` . Dot, comma, asterisk, greater than, or whitespace. - -The name is set in the `harperdb-config.yaml` file using the `clustering.nodeName` configuration element. - -_Note: If you want to change the node name make sure there are no subscriptions in place before doing so. After the name has been changed a full restart is required._ - -There are multiple ways to update this element, they are: - -1. Directly editing the `harperdb-config.yaml` file. - -```yaml -clustering: - nodeName: Node1 -``` - -_Note: When making any changes to the `harperdb-config.yaml` file Harper must be restarted for the changes to take effect._ - -1. Calling `set_configuration` through the operations API - -```json -{ - "operation": "set_configuration", - "clustering_nodeName": "Node1" -} -``` - -1. Using command line variables. - -``` -harperdb --CLUSTERING_NODENAME Node1 -``` - -1. Using environment variables. - -``` -CLUSTERING_NODENAME=Node1 -``` diff --git a/versioned_docs/version-4.7/reference/clustering/requirements-and-definitions.md b/versioned_docs/version-4.7/reference/clustering/requirements-and-definitions.md deleted file mode 100644 index 22bc3977..00000000 --- a/versioned_docs/version-4.7/reference/clustering/requirements-and-definitions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Requirements and Definitions ---- - -# Requirements and Definitions - -To create a cluster you must have two or more nodes\* (aka instances) of Harper running. - -\*_A node is a single instance/installation of Harper. A node of Harper can operate independently with clustering on or off._ - -On the following pages we'll walk you through the steps required, in order, to set up a Harper cluster. diff --git a/versioned_docs/version-4.7/reference/clustering/subscription-overview.md b/versioned_docs/version-4.7/reference/clustering/subscription-overview.md deleted file mode 100644 index b4827de7..00000000 --- a/versioned_docs/version-4.7/reference/clustering/subscription-overview.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Subscription Overview ---- - -# Subscription Overview - -A subscription defines how data should move between two nodes. They are exclusively table level and operate independently. They connect a table on one node to a table on another node, the subscription will apply to a matching database name and table name on both nodes. - -_Note: ‘local’ and ‘remote’ will often be referred to. In the context of these docs ‘local’ is the node that is receiving the API request to create/update a subscription and remote is the other node that is referred to in the request, the node on the other end of the subscription._ - -A subscription consists of: - -`database` - the name of the database that the table you are creating the subscription for belongs to. _Note, this was previously referred to as schema and may occasionally still be referenced that way._ - -`table` - the name of the table the subscription will apply to. - -`publish` - a boolean which determines if transactions on the local table should be replicated on the remote table. - -`subscribe` - a boolean which determines if transactions on the remote table should be replicated on the local table. - -#### Publish subscription - -![figure 2](/img/v4.6/clustering/figure2.png) - -This diagram is an example of a `publish` subscription from the perspective of Node1. - -The record with id 2 has been inserted in the dog table on Node1, after it has completed that insert it is sent to Node 2 and inserted in the dog table there. - -#### Subscribe subscription - -![figure 3](/img/v4.6/clustering/figure3.png) - -This diagram is an example of a `subscribe` subscription from the perspective of Node1. - -The record with id 3 has been inserted in the dog table on Node2, after it has completed that insert it is sent to Node1 and inserted there. - -#### Subscribe and Publish - -![figure 4](/img/v4.6/clustering/figure4.png) - -This diagram shows both subscribe and publish but publish is set to false. You can see that because subscribe is true the insert on Node2 is being replicated on Node1 but because publish is set to false the insert on Node1 is _**not**_ being replicated on Node2. - -![figure 5](/img/v4.6/clustering/figure5.png) - -This shows both subscribe and publish set to true. The insert on Node1 is replicated on Node2 and the update on Node2 is replicated on Node1. diff --git a/versioned_docs/version-4.7/reference/clustering/things-worth-knowing.md b/versioned_docs/version-4.7/reference/clustering/things-worth-knowing.md deleted file mode 100644 index f523c7bf..00000000 --- a/versioned_docs/version-4.7/reference/clustering/things-worth-knowing.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Things Worth Knowing ---- - -# Things Worth Knowing - -Additional information that will help you define your clustering topology. - ---- - -### Transactions - -Transactions that are replicated across the cluster are: - -- Insert -- Update -- Upsert -- Delete -- Bulk loads - - CSV data load - - CSV file load - - CSV URL load - - Import from S3 - -When adding or updating a node any databases and tables in the subscription that don’t exist on the remote node will be automatically created. - -**Destructive database operations do not replicate across a cluster**. Those operations include `drop_database`, `drop_table`, and `drop_attribute`. If the desired outcome is to drop database information from any nodes then the operation(s) will need to be run on each node independently. - -Users and roles are not replicated across the cluster. - ---- - -### Queueing - -Harper has built-in resiliency for when network connectivity is lost within a subscription. When connections are reestablished, a catchup routine is executed to ensure data that was missed, specific to the subscription, is sent/received as defined. - ---- - -### Topologies - -Harper clustering creates a mesh network between nodes giving end users the ability to create an infinite number of topologies. subscription topologies can be simple or as complex as needed. - -![](/img/v4.6/clustering/figure6.png) diff --git a/versioned_docs/version-4.7/reference/components/applications.md b/versioned_docs/version-4.7/reference/components/applications.md deleted file mode 100644 index 41210f38..00000000 --- a/versioned_docs/version-4.7/reference/components/applications.md +++ /dev/null @@ -1,221 +0,0 @@ ---- -title: Applications ---- - -# Applications - -> The contents of this page predominantly relate to **application** components. Extensions are not necessarily _deployable_. The ambiguity of the term "components" is being worked on and will be improved in future releases. As we work to clarify the terminology, please keep in mind that the component operations are synonymous with application management. In general, "components" is the general term for both applications and extensions, but in context of the operations API it refers to applications only. - -Harper offers several approaches to managing applications that differ between local development and Harper managed instances. This page will cover the recommended methods of developing, installing, deploying, and running Harper applications. - -## Local Development - -Harper is designed to be simple to run locally. Generally, Harper should be installed locally on a machine using a global package manager install (i.e. `npm i -g harperdb`). - -> Before continuing, ensure Harper is installed and the `harperdb` CLI is available. For more information, review the [installation guide](../../deployments/install-harper/). - -When developing an application locally there are a number of ways to run it on Harper. - -### `dev` and `run` commands - -The quickest way to run an application is by using the `dev` command within the application directory. - -The `harperdb dev .` command will automatically watch for file changes within the application directory and restart the Harper threads when changes are detected. - -The `dev` command will **not** restart the main thread; if this is a requirement, switch to using `run` instead and manually start/stop the process to execute the main thread. - -Stop execution for either of these processes by sending a SIGINT (generally CTRL+C) signal to the process. - -### Deploying to a local Harper instance - -Alternatively, to mimic interfacing with a hosted Harper instance, use operation commands instead. - -1. Start up Harper with `harperdb` -1. _Deploy_ the application to the local instance by executing: - - ```sh - harperdb deploy \ - project= \ - package= \ - restart=true - ``` - - - Make sure to omit the `target` option so that it _deploys_ to the Harper instance running locally - - The `package=` option creates a symlink to the application simplifying restarts - - By default, the `deploy` operation command will _deploy_ the current directory by packaging it up and streaming the bytes. By specifying `package`, it skips this and references the file path directly - - The `restart=true` option automatically restarts Harper threads after the application is deployed - - If set to `'rolling'`, a rolling restart will be triggered after the application is deployed - -1. In another terminal, use the `harperdb restart` command to restart the instance's threads at any time - - With `package=`, the application source is symlinked so changes will automatically be picked up between restarts - - If `package` was omitted, run the `deploy` command again with any new changes -1. To remove the application use `harperdb drop_component project=` - -Similar to the previous section, if the main thread needs to be restarted, start and stop the Harper instance manually (with the application deployed). Upon Harper startup, the application will automatically be loaded and executed across all threads. - -> Not all [component operations](../../developers/operations-api/components) are available via CLI. When in doubt, switch to using the Operations API via network requests to the local Harper instance. - -For example, to properly _deploy_ a `test-application` locally, the command would look like: - -```sh -harperdb deploy \ - project=test-application \ - package=/Users/dev/test-application \ - restart=true -``` - -> If the current directory is the application directory, use a shortcut such as `package=$(pwd)` to avoid typing out the complete path. - -Keep in mind that using a local file path for `package` will only work locally; deploying to a remote instance requires a different approach. - -## Remote Management - -Managing applications on a remote Harper instance is best accomplished through [component operations](../../developers/operations-api/components), similar to using the `deploy` command locally. Before continuing, always backup critical Harper instances. Managing, deploying, and executing applications can directly impact a live system. - -Remote Harper instances work very similarly to local Harper instances. The primary application management operations still include `deploy_component`, `drop_component`, and `restart`. - -The key to remote management is specifying a remote `target` along with appropriate username/password values. These can all be specified using CLI arguments: `target`, `username`, and `password`. Alternatively, the `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` environment variables can replace the `username` and `password` arguments. - -All together: - -```sh -harperdb deploy \ - project= \ - package= \ - username= \ - password= \ - target= \ - restart=true \ - replicated=true -``` - -Or, using environment variables: - -```sh -export CLI_TARGET_USERNAME= -export CLI_TARGET_PASSWORD= -harperdb deploy \ - project= \ - package= \ - target= \ - restart=true \ - replicated=true -``` - -Unlike local development where `package` should be set to a local file path for symlinking and improved development experience purposes, now it has some additional options. - -A local application can be deployed to a remote instance by **omitting** the `package` field. Harper will automatically package the local directory and include that along with the rest of the deployment operation. - -Furthermore, the `package` field can be set to any valid [npm dependency value](https://docs.npmjs.com/cli/v11/configuring-npm/package-json#dependencies). - -- For applications deployed to npm, specify the package name: `package="@harperdb/status-check"` -- For applications on GitHub, specify the URL: `package="https://github.com/HarperDB/status-check"`, or the shorthand `package=HarperDB/status-check` -- Private repositories also work if the correct SSH keys are on the server: `package="git+ssh://git@github.com:HarperDB/secret-applications.git"` - - Reference the [SSH Key](../../developers/operations-api/components#add-ssh-key) operations for more information on managing SSH keys on a remote instance -- Even tarball URLs are supported: `package="https://example.com/application.tar.gz"` - -> When using git tags, we highly recommend that you use the semver directive to ensure consistent and reliable installation by npm. In addition to tags, you can also reference branches or commit numbers. - -These `package` values are all supported because behind-the-scenes, Harper is generating a `package.json` file for the components. Then, it uses a form of `npm install` to resolve them as dependencies. This is why symlinks are generated when specifying a file path locally. The following [Advanced](#advanced) section explores this pattern in more detail. - -Finally, don't forget to include `restart=true`, or run `harperdb restart target=`. - -## Dependency Management - -Naturally, applications may have dependencies. Since we operate on top of Node.js, we default to leveraging `npm` and `package.json` for dependency management. - -As already covered, there are a number of ways to run an application on Harper. From symlinking to a local directory, to deploying it via the `deploy_component` operation. Harper does its best to seamlessly run your application. - -During application loading, if an application directory contains a `node_modules` directory or it excludes a `package.json`, Harper will skip dependency installation. Otherwise, Harper will check the application's config (values specified in the `harperdb-config.yaml` file) for `install: { command, timeout }` fields (see the example below for more information). If it exists, Harper will use the specified command to install dependencies. If not, then Harper will attempt to derive the package manager from the [`package.json#devEngines#packageManager`](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#devengines) field (which can specify an npm alternate like yarn or pnpm). Finally, if no package manager or install command could be derived, Harper will default to using `npm install`. - -The Application operations [`add_component`](../../developers/operations-api/components.md#add-component) and [`deploy_component`](../../developers/operations-api/components.md#deploy-component) support customizing the install command (and timeout) through the `install_command` and `install_timeout` fields. - -If you plan to use an alternative package manager than `npm`, ensure it is installed and configured on the host machine. Harper does not currently support the `"onFail": "download"` option in `package.json#devEngines#packageManager` and will fallback to `"onFail": "error"` behavior. - -### Example `harperdb-config.yaml` - -```yaml -myApp: - package: ./my-app - install: - command: yarn install - timeout: 600000 # 10 minutes -``` - -### Example `package.json` - -```json -{ - "name": "my-app", - "version": "1.0.0", - "devEngines": { - "packageManager": { - "name": "pnpm", - "onFail": "error" - } - } -} -``` - -## Advanced - -The following methods are advanced and should be executed with caution as they can have unintended side-effects. Always backup any critical Harper instances before continuing. - -First, locate the Harper installation `rootPath` directory. Generally, this is `~/hdb`. It can be retrieved by running `harperdb get_configuration` and looking for the `rootPath` field. - -> For a useful shortcut on POSIX compliant machines run: `harperdb get_configuration json=true | jq ".rootPath" | sed 's/"/g'` - -This path is the Harper instance. Within this directory, locate the root config titled `harperdb-config.yaml`, and the components root path. The components root path will be `/components` by default (thus, `~/hdb/components`), but it can also be configured. If necessary, use `harperdb get_configuration` again and look for the `componentsRoot` field for the exact path. - -### Adding components to root - -Similar to how components can specify other components within their `config.yaml`, applications can be added to Harper by adding them to the `harperdb-config.yaml`. - -The configuration is very similar to that of `config.yaml`. Entries are comprised of a top-level `:`, and an indented `package: ` field. Any additional component options can also be included as indented fields. - -```yaml -status-check: - package: '@harperdb/status-check' -``` - -The key difference between this and a component's `config.yaml` is that the name does **not** need to be associated with a `package.json` dependency. When Harper starts up, it transforms these configurations into a `package.json` file, and then executes a form of `npm install`. Thus, the `package: ` can be any valid dependency syntax such as npm packages, GitHub repos, tarballs, and local directories are all supported. - -Given a root config like: - -```yaml -myGithubComponent: - package: HarperDB-Add-Ons/package#v2.2.0 # install from GitHub -myNPMComponent: - package: harperdb # install from npm -myTarBall: - package: /Users/harper/cool-component.tar # install from tarball -myLocal: - package: /Users/harper/local # install from local path -myWebsite: - package: https://harperdb-component # install from URL -``` - -Harper will generate a `package.json` like: - -```json -{ - "dependencies": { - "myGithubComponent": "github:HarperDB-Add-Ons/package#v2.2.0", - "myNPMComponent": "npm:harperdb", - "myTarBall": "file://Users/harper/cool-component.tar", - "myLocal": "file://Users/harper/local", - "myWebsite": "https://harperdb-component" - } -} -``` - -npm will install all the components and store them in ``. A symlink back to `/node_modules` is also created for dependency resolution purposes. - -The package prefix is automatically added, however you can manually set it in your package reference. - -```yaml -myCoolComponent: - package: file://Users/harper/cool-component.tar -``` - -By specifying a file path, npm will generate a symlink and then changes will be automatically picked up between restarts. diff --git a/versioned_docs/version-4.7/reference/components/built-in-extensions.md b/versioned_docs/version-4.7/reference/components/built-in-extensions.md deleted file mode 100644 index 81cb456e..00000000 --- a/versioned_docs/version-4.7/reference/components/built-in-extensions.md +++ /dev/null @@ -1,319 +0,0 @@ ---- -title: Built-In Extensions ---- - -# Built-In Extensions - -Harper provides extended features using built-in extensions. They do **not** need to be installed with a package manager, and simply must be specified in a config to run. These are used throughout many Harper docs, guides, and examples. Unlike custom extensions which have their own semantic versions, built-in extensions follow Harper's semantic version. - -For more information read the [Components, Applications, and Extensions](../../developers/applications/) documentation section. - -- [Built-In Extensions](#built-in-extensions) - - [dataLoader](#dataloader) - - [fastifyRoutes](#fastifyroutes) - - [graphql](#graphql) - - [graphqlSchema](#graphqlschema) - - [jsResource](#jsresource) - - [loadEnv](#loadenv) - - [rest](#rest) - - [roles](#roles) - - [static](#static) - - [Options](#options) - - [Examples](#examples) - - [Basic Static File Serving](#basic-static-file-serving) - - [Enable automatic `index.html` serving](#enable-automatic-indexhtml-serving) - - [Enable automatic `.html` extension matching](#enable-automatic-html-extension-matching) - - [Provide a custom `404 Not Found` page](#provide-a-custom-404-not-found-page) - - [Fully customize not found response](#fully-customize-not-found-response) - -## dataLoader - -Load data from JSON or YAML files into Harper tables as part of component deployment. - -This component is an [Extension](..#extensions) and can be configured with the `files` configuration option. - -Complete documentation for this feature is available here: [Data Loader](../../developers/applications/data-loader) - -```yaml -dataLoader: - files: 'data/*.json' -``` - -## fastifyRoutes - -Specify custom endpoints using [Fastify](https://fastify.dev/). - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Define Fastify Routes](../../developers/applications/define-routes) - -```yaml -fastifyRoutes: - files: 'routes/*.js' -``` - -## graphql - -> GraphQL querying is **experimental**, and only partially implements the GraphQL Over HTTP / GraphQL specifications. - -Enables GraphQL querying via a `/graphql` endpoint loosely implementing the GraphQL Over HTTP specification. - -Complete documentation for this feature is available here: [GraphQL](../graphql) - -```yaml -graphql: true -``` - -## graphqlSchema - -Specify schemas for Harper tables and resources via GraphQL schema syntax. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Schemas](../../developers/applications/defining-schemas) - -```yaml -graphqlSchema: - files: 'schemas.graphql' -``` - -## jsResource - -Specify custom, JavaScript based Harper resources. - -Refer to the Application [Custom Functionality with JavaScript](../../developers/applications/#custom-functionality-with-javascript) guide, or [Resource Class](../resources/) reference documentation for more information on custom resources. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -```yaml -jsResource: - files: 'resource.js' -``` - -## loadEnv - -Load environment variables via files like `.env`. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Ensure this component is specified first in `config.yaml` so that environment variables are loaded prior to loading any other components. - -```yaml -loadEnv: - files: '.env' -``` - -This component matches the default behavior of dotenv where existing variables take precedence. Specify the `override` option in order to override existing environment variables assigned to `process.env`: - -```yaml -loadEnv: - files: '.env' - override: true -``` - -> Important: Harper is a single process application. Environment variables are loaded onto `process.env` and will be shared throughout all Harper components. This means environment variables loaded by one component will be available on other components (as long as the components are loaded in the correct order). - - - - - - - - - -## rest - -Enable automatic REST endpoint generation for exported resources with this component. - -Complete documentation for this feature is available here: [REST](../../developers/rest) - -```yaml -rest: true -``` - -This component contains additional options: - -To enable `Last-Modified` header support: - -```yaml -rest: - lastModified: true -``` - -To disable automatic WebSocket support: - -```yaml -rest: - webSocket: false -``` - -## roles - -Specify roles for Harper tables and resources. - -This component is a [Resource Extension](./extensions#resource-extension) and can be configured with the [`files` and `urlPath`](./extensions#resource-extension-configuration) configuration options. - -Complete documentation for this feature is available here: [Defining Roles](../../developers/applications/defining-roles) - -```yaml -roles: - files: 'roles.yaml' -``` - -## static - -Serve static files via HTTP. - -Use the [Resource Extension](./extensions#resource-extension) configuration options [`files` and `urlPath`](./extensions#resource-extension-configuration) to specify the files to be served. - -``` -my-app/ -├─ site/ -│ ├─ index.html -│ ├─ about.html -│ ├─ blog/ -│ ├─ post-1.html -│ ├─ post-2.html -├─ config.yaml -``` - -The `static` plugin can be configured to serve the `site/` directory by specifying: - -```yaml -static: - files: 'site/**' -``` - -Then you could access the files relative to the `site` directory, thus `GET localhost:9926/index.html` would return the contents of `site/index.html`, and `GET localhost:9926/blog/post-1.html` would return the contents of `site/blog/post-1.html`. - -You can use the `urlPath` option to serve the files from a different URL path, for example: - -```yaml -static: - files: 'site/**' - urlPath: 'app' -``` - -Now, `GET localhost:9926/app/index.html` would return the contents of `site/index.html`, and `GET localhost:9926/app/blog/post-1.html` would return the contents of `site/blog/post-1.html`. - -Moreover, if the `site/` directory was nested another level, such as: - -``` -my-app/ -├─ site/ -│ ├─ pages/ -│ ├─ index.html -│ ├─ about.html -│ ├─ blog/ -│ ├─ post-1.html -│ ├─ post-2.html -│ ├─ cache-info/ -│ ├─ index.json -│ ├─ about.json -│ ├─ ... -├─ config.yaml -``` - -Now a pattern such as `site/pages/**` will match all files within the `pages` directory (including subdirectories) so a request to `GET localhost:9926/index.html` will return the contents of `site/pages/index.html`, and `GET localhost:9926/blog/post-1.html` will return the contents of `site/pages/blog/post-1.html`. - -Because this plugin is implemented using the new [Plugin API](./plugins.md), it automatically updates to application changes. From updating the `config.yaml` to adding, removing, or modifying files, everything is handled automatically and Harper should **not** require a restart. - -### Options - -In addition to the general Plugin configuration options (`files`, `urlPath`, and `timeout`), this plugin supports the following configuration options: - -- `extensions` - `string[]` - _optional_ - An array of file extensions to try and serve when an exact path is not found. For example, `['html']` and the path `/site/page-1` will match `/site/page-1.html`. -- `fallthrough` - `boolean` - _optional_ - If `true`, the plugin will fall through to the next handler if the requested file is not found. Make sure to disable this option if you want to customize the 404 Not Found response with the `notFound` option. Defaults to `true`. -- `index` - `boolean` - _optional_ - If `true`, the plugin will serve an `index.html` file if it exists in the directory specified by the `files` pattern. Defaults to `false`. -- `notFound` - `string | { file: string; statusCode: number }` - _optional_ - Specify a custom file to be returned for 404 Not Found responses. If you want to specify a different statusCode when a given path cannot be found, use the object form and specify the `file` and `statusCode` properties (this is particularly useful for SPAs). - -### Examples - -The `static` plugin can be configured in various ways to provide different behaviors. Here are some common examples: - -#### Basic Static File Serving - -Serve all files contained within the `static/` directory as is. - -```yaml -static: - files: 'static/**' -``` - -Requests must match the file names exactly (relative to the `static/` directory). - -#### Enable automatic `index.html` serving - -Serve all files contained within the `static/` directory, and automatically serve an `index.html` file if it exists in the directory. - -```yaml -static: - files: 'static/**' - index: true -``` - -Now given a directory structure like: - -``` -my-app/ -├─ static/ -│ ├─ index.html -│ ├─ blog/ -│ ├─ index.html -│ ├─ post-1.html -``` - -Requests would map like: - -``` -GET / -> static/index.html -GET /blog -> static/blog/index.html -GET /blog/post-1.html -> static/blog/post-1.html -``` - -#### Enable automatic `.html` extension matching - -Expanding on the previous example, if you specify the `extensions` option, the plugin will automatically try to match the requested path with the specified extensions. - -```yaml -static: - files: 'static/**' - index: true - extensions: ['html'] -``` - -Now with the same directory structure, requests would map like: - -``` -GET / -> static/index.html -GET /blog -> static/blog/index.html -GET /blog/post-1 -> static/blog/post-1.html -``` - -#### Provide a custom `404 Not Found` page - -Sometimes when a `404 Not Found` response is not sufficient, and you want to provide a custom page or resource, you can use the `notFound` option to specify a custom file to be returned when a requested path is not found. - -```yaml -static: - files: 'static/**' - notFound: 'static/404.html' -``` - -Now if a request is made to a path that does not exist, such as `/non-existent`, the plugin will return the contents of `static/404.html` with a `404` status code. - -#### Fully customize not found response - -Most common in SPAs relying on client-side routing, you may want to override the default `404` status code when a path is not found. - -You can do this by specifying the `notFound` option as an object with a `file` and `statusCode` property. - -```yaml -static: - files: 'static/**' - notFound: - file: 'static/index.html' - statusCode: 200 -``` - -Now if a request is made to a path that does not exist, such as `/non-existent`, the plugin will return the contents of `static/index.html` with a `200` status code. This is particularly useful for SPAs where you want to serve the main application file regardless of the requested path. diff --git a/versioned_docs/version-4.7/reference/components/configuration.md b/versioned_docs/version-4.7/reference/components/configuration.md deleted file mode 100644 index 2175a03d..00000000 --- a/versioned_docs/version-4.7/reference/components/configuration.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: Component Configuration ---- - -# Component Configuration - -> For information on the distinction between the types of components (applications and extensions), refer to beginning of the [Applications](../../developers/applications) documentation section. - -Harper components are configured with a `config.yaml` file located in the root of the component module directory. This file is how an components configures other components it depends on. Each entry in the file starts with a component name, and then configuration values are indented below it. - -```yaml -name: - option-1: value - option-2: value -``` - -It is the entry's `name` that is used for component resolution. It can be one of the [built-in extensions](./built-in-extensions), or it must match a package dependency of the component as specified by `package.json`. The [Custom Component Configuration](#custom-component-configuration) section provides more details and examples. - -For some built-in extensions they can be configured with as little as a top-level boolean; for example, the [rest](./built-in-extensions#rest) extension can be enabled with just: - -```yaml -rest: true -``` - -Most components generally have more configuration options. Some options are ubiquitous to the Harper platform, such as the `files` and `urlPath` options for an [extension](./extensions) or [plugin](./plugins), or `package` for any [custom component](#custom-component-configuration). - -[Extensions](./extensions) and [plugins](./plugins) require specifying the `extensionModule` or `pluginModule` option respectively. Refer to their respective API reference documentation for more information. - -## Custom Component Configuration - -Any custom component **must** be configured with the `package` option in order for Harper to load that component. When enabled, the name of package must match a dependency of the component. For example, to use the `@harperdb/nextjs` extension, it must first be included in `package.json`: - -```json -{ - "dependencies": { - "@harperdb/nextjs": "1.0.0" - } -} -``` - -Then, within `config.yaml` it can be enabled and configured using: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - # ... -``` - -Since npm allows for a [variety of dependency configurations](https://docs.npmjs.com/cli/configuring-npm/package-json#dependencies), this can be used to create custom references. For example, to depend on a specific GitHub branch, first update the `package.json`: - -```json -{ - "dependencies": { - "harper-nextjs-test-feature": "HarperDB/nextjs#test-feature" - } -} -``` - -And now in `config.yaml`: - -```yaml -harper-nextjs-test-feature: - package: '@harperdb/nextjs' - files: './' - # ... -``` - -## Default Component Configuration - -Harper components do not need to specify a `config.yaml`. Harper uses the following default configuration to load components. - -```yaml -rest: true -graphqlSchema: - files: '*.graphql' -roles: - files: 'roles.yaml' -jsResource: - files: 'resources.js' -fastifyRoutes: - files: 'routes/*.js' - urlPath: '.' -static: - files: 'web/**' -``` - -Refer to the [built-in components](./built-in-extensions) documentation for more information on these fields. - -If a `config.yaml` is defined, it will **not** be merged with the default config. diff --git a/versioned_docs/version-4.7/reference/components/extensions.md b/versioned_docs/version-4.7/reference/components/extensions.md deleted file mode 100644 index 78012b7b..00000000 --- a/versioned_docs/version-4.7/reference/components/extensions.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -title: Extensions API ---- - -# Extensions API - -> As of Harper v4.6, a new iteration of the extension API was released called **Plugins**. They are simultaneously a simplification and an extensibility upgrade. Plugins are **experimental**, but we encourage developers to consider developing with the [plugin API](./plugins) instead of the extension API. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -There are two key types of Extensions: **Resource Extension** and **Protocol Extensions**. The key difference is a **Protocol Extensions** can return a **Resource Extension**. - -Furthermore, what defines an extension separately from a component is that it leverages any of the [Resource Extension](#resource-extension-api) or [Protocol Extension](#protocol-extension-api) APIs. - -All extensions must define a `config.yaml` file and declare an `extensionModule` option. This must be a path to the extension module source code. The path must resolve from the root of the module directory. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) `config.yaml` specifies `extensionModule: ./extension.js`. - -If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `extensionModule: ./dist/index.js`) - -## Resource Extension - -A Resource Extension is for processing a certain type of file or directory. For example, the built-in [jsResource](./built-in-extensions#jsresource) extension handles executing JavaScript files. - -Resource Extensions are comprised of four distinct function exports, [`handleFile()`](#handlefilecontents-urlpath-absolutepath-resources-void--promisevoid), [`handleDirectory()`](#handledirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void), [`setupFile()`](#setupfilecontents-urlpath-absolutepath-resources-void--promisevoid), and [`setupDirectory()`](#setupdirectoryurlpath-absolutepath-resources-boolean--void--promiseboolean--void). The `handleFile()` and `handleDirectory()` methods are executed on **all worker threads**, and are _executed again during restarts_. The `setupFile()` and `setupDirectory()` methods are only executed **once** on the **main thread** during the initial system start sequence. - -> Keep in mind that the CLI command `harperdb restart` or CLI argument `restart=true` only restarts the worker threads. If a component is deployed using `harperdb deploy`, the code within the `setupFile()` and `setupDirectory()` methods will not be executed until the system is completely shutdown and turned back on. - -Other than their execution behavior, the `handleFile()` and `setupFile()` methods, and `handleDirectory()` and `setupDirectory()` methods have identical function definitions (arguments and return value behavior). - -### Resource Extension Configuration - -Any [Resource Extension](#resource-extension) can be configured with the `files` and `urlPath` options. These options control how _files_ and _directories_ are resolved in order to be passed to the extension's `handleFile()`, `setupFile()`, `handleDirectory()`, and `setupDirectory()` methods. - -> Harper relies on the [fast-glob](https://github.com/mrmlnc/fast-glob) library for glob pattern matching. - -- `files` - `string | string[] | Object` - _required_ - A [glob pattern](https://github.com/mrmlnc/fast-glob?tab=readme-ov-file#pattern-syntax) string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the extension. If specified as an object, the `source` property is required. By default, Harper **matches files and directories**; this is configurable using the `only` option. - - `source` - `string | string[]` - _required_ - The glob pattern string or array of strings. - - `only` - `'all' | 'files' | 'directories'` - _optional_ - The glob pattern will match only the specified entry type. Defaults to `'all'`. - - `ignore` - `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. - - If the value starts with `./`, such as `'./static/'`, the component name will be included in the base url path - - If the value is `.`, then the component name will be the base url path - - Note: `..` is an invalid pattern and will result in an error - - Otherwise, the value here will be base url path. Leading and trailing `/` characters will be handled automatically (`/static/`, `/static`, and `static/` are all equivalent to `static`) - -For example, to configure the [static](./built-in-extensions#static) component to serve all HTML files from the `web` source directory on the `static` URL endpoint: - -```yaml -static: - files: 'web/*.html' - urlPath: 'static' -``` - -If there are files such as `web/index.html` and `web/blog.html`, they would be available at `localhost/static/index.html` and `localhost/static/blog.html` respectively. - -Furthermore, if the component is located in the `test-component` directory, and the `urlPath` was set to `'./static/'` instead, then the files would be served from `localhost/test-component/static/*` instead. - -The `urlPath` is optional, for example to configure the [graphqlSchema](./built-in-extensions#graphqlschema) component to load all schemas within the `src/schema` directory, only specifying a `files` glob pattern is required: - -```yaml -graphqlSchema: - files: 'src/schema/*.schema' -``` - -The `files` option also supports a more complex options object. These additional fields enable finer control of the glob pattern matching. - -For example, to match files within `web`, and omit any within the `web/images` directory, the configuration could be: - -```yaml -static: - files: - source: 'web/**/*' - ignore: ['web/images'] -``` - -In order to match only files: - -```yaml -test-component: - files: - source: 'dir/**/*' - only: 'files' -``` - -### Resource Extension API - -In order for an extension to be classified as a Resource Extension it must implement at least one of the `handleFile()`, `handleDirectory()`, `setupFile()`, or `setupDirectory()` methods. As a standalone extension, these methods should be named and exported directly. For example: - -```js -// ESM -export function handleFile() {} -export function setupDirectory() {} - -// or CJS -function handleDirectory() {} -function setupFile() {} - -module.exports = { handleDirectory, setupFile }; -``` - -When returned by a [Protocol Extension](#protocol-extension), these methods should be defined on the object instead: - -```js -export function start() { - return { - handleFile() {}, - }; -} -``` - -#### `handleFile(contents, urlPath, absolutePath, resources): void | Promise` - -#### `setupFile(contents, urlPath, absolutePath, resources): void | Promise` - -These methods are for processing individual files. They can be async. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `contents` - `Buffer` - The contents of the file -- `urlPath` - `string` - The recommended URL path of the file -- `absolutePath` - `string` - The absolute path of the file - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `void | Promise` - -#### `handleDirectory(urlPath, absolutePath, resources): boolean | void | Promise` - -#### `setupDirectory(urlPath, absolutePath, resources): boolean | void | Promise` - -These methods are for processing directories. They can be async. - -If the function returns or resolves a truthy value, then the component loading sequence will end and no other entries within the directory will be processed. - -> Remember! -> -> `setupFile()` is executed **once** on the **main thread** during the main start sequence. -> -> `handleFile()` is executed on **worker threads** and is executed again during restarts. - -Parameters: - -- `urlPath` - `string` - The recommended URL path of the directory -- `absolutePath` - `string` - The absolute path of the directory - -- `resources` - `Object` - A collection of the currently loaded resources - -Returns: `boolean | void | Promise` - -## Protocol Extension - -A Protocol Extension is a more advanced form of a Resource Extension and is mainly used for implementing higher level protocols. For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs) handles building and running a Next.js project. A Protocol Extension is particularly useful for adding custom networking handlers (see the [`server`](../globals#server) global API documentation for more information). - -### Protocol Extension Configuration - -In addition to the `files` and `urlPath` [Resource Extension configuration](#resource-extension-configuration) options, and the `package` [Custom Component configuration](#custom-component-configuration) option, Protocol Extensions can also specify additional configuration options. Any options added to the extension configuration (in `config.yaml`), will be passed through to the `options` object of the `start()` and `startOnMainThread()` methods. - -For example, the [Harper Next.js Extension](https://github.com/HarperDB/nextjs#options) specifies multiple option that can be included in its configuration. For example, a Next.js app using `@harperdb/nextjs` may specify the following `config.yaml`: - -```yaml -'@harperdb/nextjs': - package: '@harperdb/nextjs' - files: './' - prebuilt: true - dev: false -``` - -Many protocol extensions will use the `port` and `securePort` options for configuring networking handlers. Many of the [`server`](../globals#server) global APIs accept `port` and `securePort` options, so components replicated this for simpler pass-through. - -### Protocol Extension API - -A Protocol Extension is made up of two distinct methods, [`start()`](#startoptions-resourceextension--promiseresourceextension) and [`startOnMainThread()`](#startonmainthreadoptions-resourceextension--promiseresourceextension). Similar to a Resource Extension, the `start()` method is executed on _all worker threads_, and _executed again on restarts_. The `startOnMainThread()` method is **only** executed **once** during the initial system start sequence. These methods have identical `options` object parameter, and can both return a Resource Extension (i.e. an object containing one or more of the methods listed above). - -#### `start(options): ResourceExtension | Promise` - -#### `startOnMainThread(options): ResourceExtension | Promise` - -Parameters: - -- `options` - `Object` - An object representation of the extension's configuration options. - -Returns: `Object` - An object that implements any of the [Resource Extension APIs](#resource-extension-api) diff --git a/versioned_docs/version-4.7/reference/components/index.md b/versioned_docs/version-4.7/reference/components/index.md deleted file mode 100644 index 30ce276d..00000000 --- a/versioned_docs/version-4.7/reference/components/index.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Components ---- - -# Components - -**Components** are the high-level concept for modules that extend the Harper core platform adding additional functionality. Components encapsulate both applications and extensions. - -> We are actively working to disambiguate the terminology. When you see "component", such as in the Operations API or CLI, it generally refers to an application. We will do our best to clarify exactly which classification of a component whenever possible. - -**Applications** are best defined as the implementation of a specific user-facing feature or functionality. Applications are built on top of extensions and can be thought of as the end product that users interact with. For example, a Next.js application that serves a web interface or an Apollo GraphQL server that provides a GraphQL API are both applications. - -**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality the application is implementing. For example, the built-in `graphqlSchema` extension enables applications to define their databases and tables using GraphQL schemas. Furthermore, the `@harperdb/nextjs` and `@harperdb/apollo` extensions are the building blocks that provide support for building Next.js and Apollo applications. - -> As of Harper v4.6, a new, **experimental** component system has been introduced called **plugins**. Plugins are a **new iteration of the existing extension system**. They are simultaneously a simplification and an extensibility upgrade. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only have to define a single `handleApplication` method. Plugins are **experimental**, and complete documentation is available on the [plugin API](components/plugins) page. In time we plan to deprecate the concept of extensions in favor of plugins, but for now, both are supported. - -All together, the support for implementing a feature is the extension, and the actual implementation of the feature is the application. - -For more information on the differences between applications and extensions, refer to the beginning of the [Applications](../developers/applications/) guide documentation section. - -This technical reference section has detailed information on various component systems: - -- [Built-In Extensions](components/built-in-extensions) -- [Configuration](components/configuration) -- [Managing Applications](components/applications) -- [Extensions](components/extensions) -- [(Experimental) Plugins](components/plugins) - -## Custom Applications - -- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) -- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) -- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) - -## Custom Extensions - -- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) -- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) -- [`@harperdb/astro`](https://github.com/HarperDB/astro) diff --git a/versioned_docs/version-4.7/reference/components/plugins.md b/versioned_docs/version-4.7/reference/components/plugins.md deleted file mode 100644 index 7ce1d3c3..00000000 --- a/versioned_docs/version-4.7/reference/components/plugins.md +++ /dev/null @@ -1,629 +0,0 @@ ---- -title: Experimental Plugins ---- - -# Experimental Plugins - -The new, experimental **plugin** API is an iteration of the existing extension system. It simplifies the API by removing the need for multiple methods (`start`, `startOnMainThread`, `handleFile`, `setupFile`, etc.) and instead only requires a single `handleApplication` method. Plugins are designed to be more extensible and easier to use, and they are intended to replace the concept of extensions in the future. - -Similar to the existing extension API, a plugin must specify an `pluginModule` option within `config.yaml`. This must be a path to the plugin module source code. The path must resolve from the root of the module directory. For example: `pluginModule: plugin.js`. - -If the plugin is being written in something other than JavaScript (such as TypeScript), ensure that the path resolves to the built version, (i.e. `pluginModule: ./dist/index.js`) - -It is also recommended that all extensions have a `package.json` that specifies JavaScript package metadata such as name, version, type, etc. Since plugins are just JavaScript packages, they can do anything a JavaScript package can normally do. It can be written in TypeScript, and compiled to JavaScript. It can export an executable (using the [bin](https://docs.npmjs.com/cli/configuring-npm/package-json#bin) property). It can be published to npm. The possibilities are endless! - -The key to a plugin is the [`handleApplication()`](#function-handleapplicationscope-scope-void--promisevoid) method. It must be exported by the `pluginModule`, and cannot coexist with any of the other extension methods such as `start`, `handleFile`, etc. The component loader will throw an error if both are defined. - -The `handleApplication()` method is executed **sequentially** across all **worker threads** during the component loading sequence. It receives a single, `scope` argument that contains all of the relevant metadata and APIs for interacting with the associated component. - -The method can be async and it is awaited by the component loader. - -However, it is highly recommended to avoid event-loop-blocking operations within the `handleApplication()` method. See the examples section for best practices on how to use the `scope` argument effectively. - -## Configuration - -As plugins are meant to be used by applications in order to implement some feature, many plugins provide a variety of configuration options to customize their behavior. Some plugins even require certain configuration options to be set in order to function properly. - -As a brief overview, the general configuration options available for plugins are: - -- `files` - `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - _optional_ - A glob pattern string or array of strings that specifies the files and directories to be handled by the plugin's default `EntryHandler` instance. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries handled by the plugin's default `EntryHandler` instance. -- `timeout` - `number` - _optional_ - The timeout in milliseconds for the plugin's operations. If not specified, the system default is **30 seconds**. Plugins may override the system default themselves, but this configuration option is the highest priority and takes precedence. - -### File Entries - -Just like extensions, plugins support the `files` and `urlPath` options for file entry matching. The values specified for these options are used for the default `EntryHandler` instance created by the `scope.handleEntry()` method. As the reference documentation details, similar options can be used to create custom `EntryHandler` instances too. - -The `files` option can be a glob pattern string, an array of glob pattern strings, or a more expressive glob options object. - -- The patterns **cannot** contain `..` or start with `/`. -- The pattern `.` or `./` is transformed into `**/*` automatically. -- Often, it is best to omit a leading `.` or `./` in the glob pattern. - -The `urlPath` option is a base URL path that is prepended to the resolved `files` entries. - -- It **cannot** contain `..`. -- If it starts with `./` or is just `.`, the name of the plugin will be automatically prepended to it. - -Putting this all together, to configure the [static](./built-in-extensions#static) built-in extension to serve files from the `web` directory but at the `/static/` path, the `config.yaml` would look like this: - -```yaml -static: - files: 'web/**/*' - urlPath: '/static/' -``` - -Keep in mind the `urlPath` option is completely optional. - -As another example, to configure the [graphqlSchema](./built-in-extensions#graphqlschema) built-in extension to serve only `*.graphql` files from within the top-level of the `src/schema` directory, the `config.yaml` would look like this: - -```yaml -graphqlSchema: - files: 'src/schema/*.graphql' -``` - -As detailed, the `files` option also supports a more complex object syntax for advanced use cases. - -For example, to match files within the `web` directory, and omit any within `web/images`, you can use a configuration such as: - -```yaml -static: - files: - source: 'web/**/*' - ignore: 'web/images/**' -``` - -> If you're transitioning from the [extension](./extensions) system, the `files` option object no longer supports an `only` field. Instead, use the `entryEvent.entryType` or the specific `entryEvent.eventType` fields in [`onEntryEventHandler(entryEvent)`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) method or any of the specific [`EntryHandler`](#class-entryhandler) events. - -### Timeouts - -The default timeout for all plugins is **30 seconds**. If the method does not complete within this time, the component loader will throw an error and unblock the component loading sequence. This is to prevent the component loader from hanging indefinitely if a plugin fails to respond or takes too long to execute. - -The plugin module can export a `defaultTimeout` variable (in milliseconds) that will override the system default. - -For example: - -```typescript -export const defaultTimeout = 60_000; // 60 seconds -``` - -Additionally, users can specify a `timeout` option in their application's `config.yaml` file for a specific plugin. This option takes precedence over the plugin's `defaultTimeout` and the system default. - -For example: - -```yaml -customPlugin: - package: '@harperdb/custom-plugin' - files: 'foo.js' - timeout: 45_000 # 45 seconds -``` - -## Example: Statically hosting files - -This is a functional example of how the `handleApplication()` method and `scope` argument can be used to create a simple static file server plugin. This example assumes that the component has a `config.yaml` with the `files` option set to a glob pattern that matches the files to be served. - -> This is a simplified form of the [static](./built-in-extensions#static) built-in extension. - -```js -export function handleApplication(scope) { - const staticFiles = new Map(); - - scope.options.on('change', (key, value, config) => { - if (key[0] === 'files' || key[0] === 'urlPath') { - // If the files or urlPath options change, we need to reinitialize the static files map - staticFiles.clear(); - logger.info(`Static files reinitialized due to change in ${key.join('.')}`); - } - }); - - scope.handleEntry((entry) => { - if (entry.entryType === 'directory') { - logger.info(`Cannot serve directories. Update the files option to only match files.`); - return; - } - - switch (entry.eventType) { - case 'add': - case 'change': - // Store / Update the file contents in memory for serving - staticFiles.set(entry.urlPath, entry.contents); - break; - case 'unlink': - // Remove the file from memory when it is deleted - staticFiles.delete(entry.urlPath); - break; - } - }); - - scope.server.http( - (req, next) => { - if (req.method !== 'GET') return next(req); - - // Attempt to retrieve the requested static file from memory - const staticFile = staticFiles.get(req.pathname); - - return staticFile - ? { - statusCode: 200, - body: staticFile, - } - : { - statusCode: 404, - body: 'File not found', - }; - }, - { runFirst: true } - ); -} -``` - -In this example, the entry handler method passed to `handleEntry` will manage the map of static files in memory using their computed `urlPath` and the `contents`. If the config file changes (and thus a new default file or url path is specified) the plugin will clear the file map as well to remove artifacts. Furthermore, it uses the `server.http()` middleware to hook into the HTTP request handling. - -This example is heavily simplified, but it demonstrates how the different key parts of `scope` can be used together to provide a performant and reactive application experience. - -## API - -### TypeScript support - -The classes and types referenced below are all exported by the `harperdb` package. Just import the ones you need like this: - -```typescript -import { Scope, type Config } from 'harperdb'; -``` - -### Function: `handleApplication(scope: Scope): void | Promise` - -Parameters: - -- `scope` - [`Scope`](#class-scope) - An instance of the `Scope` class that provides access to the relative application's configuration, resources, and other APIs. - -Returns: `void | Promise` - -This is the only method a plugin module must export. It can be async and is awaited by the component loader. The `scope` argument provides access to the relative application's configuration, resources, and other APIs. - -### Class: `Scope` - -- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -#### Event: `'close'` - -Emitted after the scope is closed via the `close()` method. - -#### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -#### Event: `'ready'` - -Emitted when the Scope is ready to be used after loading the associated config file. It is awaited by the component loader, so it is not necessary to await it within the `handleApplication()` method. - -#### `scope.close()` - -Returns: `this` - The current `Scope` instance. - -Closes all associated entry handlers, the associated `scope.options` instance, emits the `'close'` event, and then removes all other listeners on the instance. - -#### `scope.handleEntry([files][, handler])` - -Parameters: - -- `files` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) | [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ -- `handler` - [`onEntryEventHandler`](#function-onentryeventhandlerentryevent-fileentryevent--directoryentryevent-void) - _optional_ - -Returns: [`EntryHandler`](#class-entryhandler) - An instance of the `EntryHandler` class that can be used to handle entries within the scope. - -The `handleEntry()` method is the key to handling file system entries specified by a `files` glob pattern option in `config.yaml`. This method is used to register an entry event handler, specifically for the `EntryHandler` [`'all'`](#event-all) event. The method signature is very flexible, and allows for the following variations: - -- `scope.handleEntry()` (with no arguments) Returns the default `EntryHandler` created by the `files` and `urlPath` options in the `config.yaml`. -- `scope.handleEntry(handler)` (where `handler` is an `onEntryEventHandler`) Returns the default `EntryHandler` instance (based on the options within `config.yaml`) and uses the provided `handler` for the [`'all'`](#event-all) event. -- `scope.handleEntry(files)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`) Returns a new `EntryHandler` instance that handles the specified `files` configuration. -- `scope.handleEntry(files, handler)` (where `files` is `FilesOptions` or `FileAndURLPathConfig`, and `handler` is an `onEntryEventHandler`) Returns a new `EntryHandler` instance that handles the specified `files` configuration and uses the provided `handler` for the [`'all'`](#event-all) event. - -For example: - -```js -export function handleApplication(scope) { - // Get the default EntryHandler instance - const defaultEntryHandler = scope.handleEntry(); - - // Assign a handler for the 'all' event on the default EntryHandler - scope.handleEntry((entry) => { - /* ... */ - }); - - // Create a new EntryHandler for the 'src/**/*.js' files option with a custom `'all'` event handler. - const customEntryHandler = scope.handleEntry( - { - files: 'src/**/*.js', - }, - (entry) => { - /* ... */ - } - ); - - // Create another custom EntryHandler for the 'src/**/*.ts' files option, but without a `'all'` event handler. - const anotherCustomEntryHandler = scope.handleEntry({ - files: 'src/**/*.ts', - }); -} -``` - -And thus, if the previous code was used by a component with the following `config.yaml`: - -```yaml -customPlugin: - files: 'web/**/*' -``` - -Then the default `EntryHandler` instances would be created to handle all entries within the `web` directory. - -#### `scope.requestRestart()` - -Returns: `void` - -Request a Harper restart. This **does not** restart the instance immediately, but rather indicates to the user that a restart is required. This should be called when the plugin cannot handle the entry event and wants to indicate to the user that the Harper instance should be restarted. - -This method is called automatically by the `scope` instance if the user has not defined an `scope.options.on('change')` handler or if an event handler exists and is missing a necessary handler method. - -#### `scope.resources` - -Returns: `Map` - A map of the currently loaded [Resource](../globals#resource) instances. - -#### `scope.server` - -Returns: `server` - A reference to the [server](../globals#server) global API. - -#### `scope.options` - -Returns: [`OptionsWatcher`](#class-optionswatcher) - An instance of the `OptionsWatcher` class that provides access to the application's configuration options. Emits `'change'` events when the respective plugin part of the component's config file is modified. - -For example, if the plugin `customPlugin` is configured by an application with: - -```yaml -customPlugin: - files: 'foo.js' -``` - -And has the following `handleApplication(scope)` implementation: - -```typescript -export function handleApplication(scope) { - scope.options.on('change', (key, value, config) => { - if (key[0] === 'files') { - // Handle the change in the files option - scope.logger.info(`Files option changed to: ${value}`); - } - }); -} -``` - -Then modifying the `files` option in the `config.yaml` to `bar.js` would log the following: - -```plaintext -Files option changed to: bar.js -``` - -#### `scope.logger` - -Returns: `logger` - A scoped instance of the [`logger`](../globals#logger) class that provides logging capabilities for the plugin. - -It is recommended to use this instead of the `logger` global. - -#### `scope.name` - -Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. - -#### `scope.directory` - -Returns: `string` - The directory of the application. This is the root directory of the component where the `config.yaml` file is located. - -### Interface: `FilesOption` - -- `string` | `string[]` | [`FilesOptionObject`](#interface-filesoptionobject) - -### Interface: `FilesOptionObject` - -- `source` - `string` | `string[]` - _required_ - The glob pattern string or array of strings. -- `ignore` - `string` | `string[]` - _optional_ - An array of glob patterns to exclude from matches. This is an alternative way to use negative patterns. Defaults to `[]`. - -### Interface: `FileAndURLPathConfig` - -- `files` - [`FilesOption`](#interface-filesoption) - _required_ - A glob pattern string, array of glob pattern strings, or a more expressive glob options object determining the set of files and directories to be resolved for the plugin. -- `urlPath` - `string` - _optional_ - A base URL path to prepend to the resolved `files` entries. - -### Class: `OptionsWatcher` - -- Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -#### Event: `'change'` - -- `key` - `string[]` - The key of the changed option split into parts (e.g. `foo.bar` becomes `['foo', 'bar']`). -- `value` - [`ConfigValue`](#interface-configvalue) - The new value of the option. -- `config` - [`ConfigValue`](#interface-configvalue) - The entire configuration object of the plugin. - -The `'change'` event is emitted whenever an configuration option is changed in the configuration file relative to the application and respective plugin. - -Given an application using the following `config.yaml`: - -```yaml -customPlugin: - files: 'web/**/*' -otherPlugin: - files: 'index.js' -``` - -The `scope.options` for the respective plugin's `customPlugin` and `otherPlugin` would emit `'change'` events when the `files` options relative to them are modified. - -For example, if the `files` option for `customPlugin` is changed to `web/**/*.js`, the following event would be emitted _only_ within the `customPlugin` scope: - -```js -scope.options.on('change', (key, value, config) => { - key; // ['files'] - value; // 'web/**/*.js' - config; // { files: 'web/**/*.js' } -}); -``` - -#### Event: `'close'` - -Emitted when the `OptionsWatcher` is closed via the `close()` method. The watcher is not usable after this event is emitted. - -#### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -#### Event: `'ready'` - -- `config` - [`ConfigValue`](#interface-configvalue) | `undefined` - The configuration object of the plugin, if present. - -This event can be emitted multiple times. It is first emitted upon the initial load, but will also be emitted after restoring a configuration file or configuration object after a `'remove'` event. - -#### Event: `'remove'` - -The configuration was removed. This can happen if the configuration file was deleted, the configuration object within the file is deleted, or if the configuration file fails to parse. Once restored, the `'ready'` event will be emitted again. - -#### `options.close()` - -Returns: `this` - The current `OptionsWatcher` instance. - -Closes the options watcher, removing all listeners and preventing any further events from being emitted. The watcher is not usable after this method is called. - -#### `options.get(key)` - -Parameters: - -- `key` - `string[]` - The key of the option to get, split into parts (e.g. `foo.bar` is represented as `['foo', 'bar']`). - -Returns: [`ConfigValue`](#interface-configvalue) | `undefined` - -If the config is defined it will attempt to retrieve the value of the option at the specified key. If the key does not exist, it will return `undefined`. - -#### `options.getAll()` - -Returns: [`ConfigValue`](#interface-configvalue) | `undefined` - -Returns the entire configuration object for the plugin. If the config is not defined, it will return `undefined`. - -#### `options.getRoot()` - -Returns: [`Config`](#interface-config) | `undefined` - -Returns the root configuration object of the application. This is the entire configuration object, basically the parsed form of the `config.yaml`. If the config is not defined, it will return `undefined`. - -#### Interface: `Config` - -- `[key: string]` [`ConfigValue`](#interface-configvalue) - -An object representing the `config.yaml` file configuration. - -#### Interface: `ConfigValue` - -- `string` | `number` | `boolean` | `null` | `undefined` | `ConfigValue[]` | [`Config`](#interface-config) - -Any valid configuration value type. Essentially, the primitive types, an array of those types, or an object comprised of values of those types. - -### Class: `EntryHandler` - -Extends: [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) - -Created by calling [`scope.handleEntry()`](#scopehandleentry) method. - -#### Event: `'all'` - -- `entry` - [`FileEntry`](#interface-fileentry) | [`DirectoryEntry`](#interface-directoryentry) - The entry that was added, changed, or removed. - -The `'all'` event is emitted for all entry events, including file and directory events. This is the event that the handler method in `scope.handleEntry` is registered for. The event handler receives an `entry` object that contains the entry metadata, such as the file contents, URL path, and absolute path. - -An effective pattern for this event is: - -```js -async function handleApplication(scope) { - scope.handleEntry((entry) => { - switch (entry.eventType) { - case 'add': - // Handle file addition - break; - case 'change': - // Handle file change - break; - case 'unlink': - // Handle file deletion - break; - case 'addDir': - // Handle directory addition - break; - case 'unlinkDir': - // Handle directory deletion - break; - } - }); -} -``` - -#### Event: `'add'` - -- `entry` - [`AddFileEvent`](#interface-addfileevent) - The file entry that was added. - -The `'add'` event is emitted when a file is created (or the watcher sees it for the first time). The event handler receives an `AddFileEvent` object that contains the file contents, URL path, absolute path, and other metadata. - -#### Event: `'addDir'` - -- `entry` - [`AddDirectoryEvent`](#interface-adddirectoryevent) - The directory entry that was added. - -The `'addDir'` event is emitted when a directory is created (or the watcher sees it for the first time). The event handler receives an `AddDirectoryEvent` object that contains the URL path and absolute path of the directory. - -#### Event: `'change'` - -- `entry` - [`ChangeFileEvent`](#interface-changefileevent) - The file entry that was changed. - -The `'change'` event is emitted when a file is modified. The event handler receives a `ChangeFileEvent` object that contains the updated file contents, URL path, absolute path, and other metadata. - -#### Event: `'close'` - -Emitted when the entry handler is closed via the [`entryHandler.close()`](#entryhandlerclose) method. - -#### Event: `'error'` - -- `error` - `unknown` - The error that occurred. - -#### Event: `'ready'` - -Emitted when the entry handler is ready to be used. This is not automatically awaited by the component loader, but also is not required. Calling `scope.handleEntry()` is perfectly sufficient. This is generally useful if you need to do something _after_ the entry handler is absolutely watching and handling entries. - -#### Event: `'unlink'` - -- `entry` - [`UnlinkFileEvent`](#interface-unlinkfileevent) - The file entry that was deleted. - -The `'unlink'` event is emitted when a file is deleted. The event handler receives an `UnlinkFileEvent` object that contains the URL path and absolute path of the deleted file. - -#### Event: `'unlinkDir'` - -- `entry` - [`UnlinkDirectoryEvent`](#interface-unlinkdirectoryevent) - The directory entry that was deleted. - -The `'unlinkDir'` event is emitted when a directory is deleted. The event handler receives an `UnlinkDirectoryEvent` object that contains the URL path and absolute path of the deleted directory. - -#### `entryHandler.name` - -Returns: `string` - The name of the plugin as configured in the `config.yaml` file. This is the key under which the plugin is configured. - -The name of the plugin. - -#### `entryHandler.directory` - -Returns: `string` - -The directory of the application. This is the root directory of the component where the `config.yaml` file is located. - -#### `entryHandler.close()` - -Returns: `this` - The current `EntryHandler` instance. - -Closes the entry handler, removing all listeners and preventing any further events from being emitted. The handler can be started again using the [`entryHandler.update()`](#entryhandlerupdateconfig) method. - -#### `entryHandler.update(config)` - -Parameters: - -- `config` - [`FilesOption`](#interface-filesoption) | [`FileAndURLPathConfig`](#interface-fileandurlpathconfig) - The configuration object for the entry handler. - -This method will update an existing entry handler to watch new entries. It will close the underlying watcher and create a new one, but will maintain any existing listeners on the EntryHandler instance itself. - -This method returns a promise associated with the ready event of the updated handler. - -#### Interface: `BaseEntry` - -- `stats` - [`fs.Stats`](https://nodejs.org/docs/latest/api/fs.html#class-fsstats) | `undefined` - The file system stats for the entry. -- `urlPath` - `string` - The recommended URL path of the entry. -- `absolutePath` - `string` - The absolute path of the entry. - -The foundational entry handle event object. The `stats` may or may not be present depending on the event, entry type, and platform. - -The `urlPath` is resolved based on the configured pattern (`files:` option) combined with the optional `urlPath` option. This path is generally useful for uniquely representing the entry. It is used in the built-in components such as `jsResource` and `static`. - -The `absolutePath` is the file system path for the entry. - -#### Interface: `FileEntry` - -Extends [`BaseEntry`](#interface-baseentry) - -- `contents` - `Buffer` - The contents of the file. - -A specific extension of the `BaseEntry` interface representing a file entry. We automatically read the contents of the file so the user doesn't have to bother with FS operations. - -There is no `DirectoryEntry` since there is no other important metadata aside from the `BaseEntry` properties. If a user wants the contents of a directory, they should adjust the pattern to resolve files instead. - -#### Interface: `EntryEvent` - -Extends [`BaseEntry`](#interface-baseentry) - -- `eventType` - `string` - The type of entry event. -- `entryType` - `string` - The type of entry, either a file or a directory. - -A general interface representing the entry handle event objects. - -#### Interface: `AddFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'add'` -- `entryType` - `'file'` - -Event object emitted when a file is created (or the watcher sees it for the first time). - -#### Interface: `ChangeFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'change'` -- `entryType` - `'file'` - -Event object emitted when a file is modified. - -#### Interface: `UnlinkFileEvent` - -Extends [`EntryEvent`](#interface-entryevent), [FileEntry](#interface-fileentry) - -- `eventType` - `'unlink'` -- `entryType` - `'file'` - -Event object emitted when a file is deleted. - -#### Interface: `FileEntryEvent` - -- `AddFileEvent` | `ChangeFileEvent` | `UnlinkFileEvent` - -A union type representing the file entry events. These events are emitted when a file is created, modified, or deleted. The `FileEntry` interface provides the file contents and other metadata. - -#### Interface: `AddDirectoryEvent` - -Extends [`EntryEvent`](#interface-entryevent) - -- `eventType` - `'addDir'` -- `entryType` - `'directory'` - -Event object emitted when a directory is created (or the watcher sees it for the first time). - -#### Interface: `UnlinkDirectoryEvent` - -Extends [`EntryEvent`](#interface-entryevent) - -- `eventType` - `'unlinkDir'` -- `entryType` - `'directory'` - -Event object emitted when a directory is deleted. - -#### Interface: `DirectoryEntryEvent` - -- `AddDirectoryEvent` | `UnlinkDirectoryEvent` - -A union type representing the directory entry events. There are no change events for directories since they are not modified in the same way as files. - -#### Function: `onEntryEventHandler(entryEvent: FileEntryEvent | DirectoryEntryEvent): void` - -Parameters: - -- `entryEvent` - [`FileEntryEvent`](#interface-fileentryevent) | [`DirectoryEntryEvent`](#interface-directoryentryevent) - -Returns: `void` - -This function is what is passed to the `scope.handleEntry()` method as the handler for the `'all'` event. This is also applicable to a custom `.on('all', handler)` method for any `EntryHandler` instance. diff --git a/versioned_docs/version-4.7/reference/content-types.md b/versioned_docs/version-4.7/reference/content-types.md deleted file mode 100644 index b7d223f4..00000000 --- a/versioned_docs/version-4.7/reference/content-types.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Content Types ---- - -# Content Types - -Harper supports several different content types (or MIME types) for both HTTP request bodies (describing operations) as well as for serializing content into HTTP response bodies. Harper follows HTTP standards for specifying both request body content types and acceptable response body content types. Any of these content types can be used with any of the standard Harper operations. - -:::tip Need a custom content type? - -Harper's extensible content type system lets you add support for any serialization format (XML, YAML, proprietary formats, etc.) by registering custom handlers in the [`contentTypes`](./globals.md#contenttypes) global Map. See the linked API reference for detailed implementation types, handler properties, and examples. - -::: - -For request body content, the content type should be specified with the `Content-Type` header. For example with JSON, use `Content-Type: application/json` and for CBOR, include `Content-Type: application/cbor`. To request that the response body be encoded with a specific content type, use the `Accept` header. If you want the response to be in JSON, use `Accept: application/json`. If you want the response to be in CBOR, use `Accept: application/cbor`. - -The following content types are supported: - -## JSON - application/json - -JSON is the most widely used content type, and is relatively readable and easy to work with. However, JSON does not support all the data types that are supported by Harper, and can't be used to natively encode data types like binary data or explicit Maps/Sets. Also, JSON is not as efficient as binary formats. When using JSON, compression is recommended (this also follows standard HTTP protocol with the `Accept-Encoding` header) to improve network transfer performance (although there is server performance overhead). JSON is a good choice for web development and when standard JSON types are sufficient and when combined with compression and debuggability/observability is important. - -## CBOR - application/cbor - -CBOR is a highly efficient binary format, and is a recommended format for most production use cases with Harper. CBOR supports the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. CBOR is very performant and space efficient even without compression. Compression will still yield better network transfer size/performance, but compressed CBOR is generally not any smaller than compressed JSON. CBOR also natively supports streaming for optimal performance (using indefinite length arrays). The CBOR format has excellent standardization and Harper's CBOR provides an excellent balance of performance and size efficiency. - -## MessagePack - application/x-msgpack - -MessagePack is another efficient binary format like CBOR, with support for all Harper data types. MessagePack generally has wider adoption than CBOR and can be useful in systems that don't have CBOR support (or good support). However, MessagePack does not have native support for streaming of arrays of data (for query results), and so query results are returned as a (concatenated) sequence of MessagePack objects/maps. MessagePack decoders used with Harper's MessagePack must be prepared to decode a direct sequence of MessagePack values to properly read responses. - -## Comma-separated Values (CSV) - text/csv - -Comma-separated values is an easy to use and understand format that can be readily imported into spreadsheets or used for data processing. CSV lacks hierarchical structure for most data types, and shouldn't be used for frequent/production use, but when you need it, it is available. - -In addition, with the REST interface, you can use file-style extensions to indicate an encoding like [https://host/path.csv](https://host/path.csv) to indicate CSV encoding. See the [REST documentation](../developers/rest) for more information on how to do this. diff --git a/versioned_docs/version-4.7/reference/data-types.md b/versioned_docs/version-4.7/reference/data-types.md deleted file mode 100644 index df03e718..00000000 --- a/versioned_docs/version-4.7/reference/data-types.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Data Types ---- - -# Data Types - -Harper supports a rich set of data types for use in records in databases. Various data types can be used from both direct JavaScript interfaces in Custom Functions and the HTTP operations APIs. Using JSON for communication naturally limits the data types to those available in JSON (Harper’s supports all of JSON data types), but JavaScript code and alternate data formats facilitate the use of additional data types. Harper supports MessagePack and CBOR, which allows for all of Harper supported data types. [Schema definitions can specify the expected types for fields, with GraphQL Schema Types](../developers/applications/defining-schemas), which are used for validation of incoming typed data (JSON, MessagePack), and is used for auto-conversion of untyped data (CSV, [query parameters](../developers/rest)). Available data types include: - -(Note that these labels are descriptive, they do not necessarily correspond to the GraphQL schema type names, but the schema type names are noted where possible) - -## Boolean - -true or false. The GraphQL schema type name is `Boolean`. - -## String - -Strings, or text, are a sequence of any unicode characters and are internally encoded with UTF-8. The GraphQL schema type name is `String`. - -## Number - -Numbers can be stored as signed integers up to a 1000 bits of precision (about 300 digits) or floating point with 64-bit floating point precision, and numbers are automatically stored using the most optimal type. With JSON, numbers are automatically parsed and stored in the most appropriate format. Custom components and applications may use BigInt numbers to store/access integers that are larger than 53-bit. The following GraphQL schema type name are supported: - -- `Float` - Any number that can be represented with [64-bit double precision floating point number](https://en.wikipedia.org/wiki/Double-precision_floating-point_format) ("double") -- `Int` - Any integer between from -2147483648 to 2147483647 -- `Long` - Any integer between from -9007199254740992 to 9007199254740992 -- `BigInt` - Any integer (negative or positive) with less than 300 digits - -Note that `BigInt` is a distinct and separate type from standard numbers in JavaScript, so custom code should handle this type appropriately. - -## Object/Map - -Objects, or maps, that hold a set named properties can be stored in Harper. When provided as JSON objects or JavaScript objects, all property keys are stored as strings. The order of properties is also preserved in Harper’s storage. Duplicate property keys are not allowed (they are dropped in parsing any incoming data). - -## Array - -Arrays hold an ordered sequence of values and can be stored in Harper. There is no support for sparse arrays, although you can use objects to store data with numbers (converted to strings) as properties. - -## Null - -A null value can be stored in Harper property values as well. - -## Date - -Dates can be stored as a specific data type. This is not supported in JSON, but is supported by MessagePack and CBOR. Custom Functions can also store and use Dates using JavaScript Date instances. The GraphQL schema type name is `Date`. - -## Binary Data - -Binary data can be stored in property values as well, with two different data types that are available: - -### Bytes - -JSON doesn’t have any support for encoding binary data, but MessagePack and CBOR support binary data in data structures, and this will be preserved in HarperDB. Custom Functions can also store binary data by using NodeJS’s Buffer or Uint8Array instances to hold the binary data. The GraphQL schema type name is `Bytes`. - -### Blobs - -Binary data can also be stored with [`Blob`s](blob), which can scale much better for larger content than `Bytes`, as it is designed to be streamed and does not need to be held entirely in memory. It is recommended that `Blob`s are used for content larger than 20KB. - -## Explicit Map/Set - -Explicit instances of JavaScript Maps and Sets can be stored and preserved in Harper as well. This can’t be represented with JSON, but can be with CBOR. diff --git a/versioned_docs/version-4.7/reference/dynamic-schema.md b/versioned_docs/version-4.7/reference/dynamic-schema.md deleted file mode 100644 index 97f5792d..00000000 --- a/versioned_docs/version-4.7/reference/dynamic-schema.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: Dynamic Schema ---- - -# Dynamic Schema - -When tables are created without any schema, through the operations API (without specifying attributes) or studio, the tables follow "dynamic-schema" behavior. Generally it is best-practice to define schemas for your tables to ensure predictable, consistent structures with data integrity and precise control over indexing, without dependency on data itself. However, it can often be simpler and quicker to simply create a table and let the data auto-generate the schema dynamically with everything being auto-indexed for broad querying. - -With dynamic schemas individual attributes are reflexively created as data is ingested, meaning the table will adapt to the structure of data ingested. Harper tracks the metadata around schemas, tables, and attributes allowing for describe table, describe schema, and describe all operations. - -### Databases - -Harper databases hold a collection of tables together in a single file that are transactionally connected. This means that operations across tables within a database can be performed in a single atomic transaction. By default tables are added to the default database called "data", but other databases can be created and specified for tables. - -### Tables - -Harper tables group records together with a common data pattern. To create a table users must provide a table name and a primary key. - -- **Table Name**: Used to identify the table. -- **Primary Key**: This is a required attribute that serves as the unique identifier for a record and is also known as the `hash_attribute` in Harper operations API. - -## Primary Key - -The primary key (also referred to as the `hash_attribute`) is used to uniquely identify records. Uniqueness is enforced on the primary; inserts with the same primary key will be rejected. If a primary key is not provided on insert, a GUID will be automatically generated and returned to the user. The [Harper Storage Algorithm](storage-algorithm) utilizes this value for indexing. - -**Standard Attributes** - -With tables that are using dynamic schemas, additional attributes are reflexively added via insert and update operations (in both SQL and NoSQL) when new attributes are included in the data structure provided to Harper. As a result, schemas are additive, meaning new attributes are created in the underlying storage algorithm as additional data structures are provided. Harper offers `create_attribute` and `drop_attribute` operations for users who prefer to manually define their data model independent of data ingestion. When new attributes are added to tables with existing data the value of that new attribute will be assumed `null` for all existing records. - -**Audit Attributes** - -Harper automatically creates two audit attributes used on each record if the table is created without a schema. - -- `__createdtime__`: The time the record was created in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. -- `__updatedtime__`: The time the record was updated in [Unix Epoch with milliseconds](https://www.epochconverter.com/) format. - -### Dynamic Schema Example - -To better understand the behavior let’s take a look at an example. This example utilizes [Harper API operations](../developers/operations-api/databases-and-tables). - -**Create a Database** - -```bash -{ - "operation": "create_database", - "schema": "dev" -} -``` - -**Create a Table** - -Notice the schema name, table name, and primary key name are the only required parameters. - -```bash -{ - "operation": "create_table", - "database": "dev", - "table": "dog", - "primary_key": "id" -} -``` - -At this point the table does not have structure beyond what we provided, so the table looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_2_create_table.png.webp) - -**Insert Record** - -To define attributes we do not need to do anything beyond sending them in with an insert operation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 1, "dog_name": "Penny", "owner_name": "Kyle"} - ] -} -``` - -With a single record inserted and new attributes defined, our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_3_insert_record.png.webp) - -Indexes have been automatically created for `dog_name` and `owner_name` attributes. - -**Insert Additional Record** - -If we continue inserting records with the same data schema no schema updates are required. One record will omit the hash attribute from the insert to demonstrate GUID generation. - -```bash -{ - "operation": "insert", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "dog_name": "Monk", "owner_name": "Aron"}, - {"dog_name": "Harper","owner_name": "Stephen"} - ] -} -``` - -In this case, there is no change to the schema. Our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_4_insert_additional_record.png.webp) - -**Update Existing Record** - -In this case, we will update a record with a new attribute not previously defined on the table. - -```bash -{ - "operation": "update", - "database": "dev", - "table": "dog", - "records": [ - {"id": 2, "weight_lbs": 35} - ] -} -``` - -Now we have a new attribute called `weight_lbs`. Our table now looks like this: - -**dev.dog** - -![](/img/v4.6/reference/dynamic_schema_5_update_existing_record.png.webp) - -**Query Table with SQL** - -Now if we query for all records where `weight_lbs` is `null` we expect to get back two records. - -```bash -{ - "operation": "sql", - "sql": "SELECT * FROM dev.dog WHERE weight_lbs IS NULL" -} -``` - -This results in the expected two records being returned. - -![](/img/v4.6/reference/dynamic_schema_6_query_table_with_sql.png.webp) diff --git a/versioned_docs/version-4.7/reference/globals.md b/versioned_docs/version-4.7/reference/globals.md deleted file mode 100644 index 4316d897..00000000 --- a/versioned_docs/version-4.7/reference/globals.md +++ /dev/null @@ -1,422 +0,0 @@ ---- -title: Globals ---- - -# Globals - -The primary way that JavaScript code can interact with Harper is through the global variables, which has several objects and classes that provide access to the tables, server hooks, and resources that Harper provides for building applications. As global variables, these can be directly accessed in any module. - -These global variables are also available through the `harperdb` module/package, which can provide better typing in TypeScript. To use this with your own directory, make sure you link the package to your current `harperdb` installation: - -```bash -npm link harperdb -``` - -The `harperdb` package is automatically linked for all installed components. Once linked, if you are using EcmaScript module syntax you can import function from `harperdb` like: - -```javascript -import { tables, Resource } from 'harperdb'; -``` - -Or if you are using CommonJS format for your modules: - -```javascript -const { tables, Resource } = require('harperdb'); -``` - -The global variables include: - -## `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -## `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -## `Resource` - -This is the base class for all resources, including tables and external data sources. This is provided so that you can extend it to implement custom data source providers. See the [Resource API documentation](resources/) for more details about implementing a Resource class. - -## `auth(username, password?): Promise` - -This returns the user object with permissions/authorization information based on the provided username. If a password is provided, the password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -## `logger` - -This provides methods `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` for logging. See the [logging documentation](../administration/logging/standard-logging) for more information. - -## `server` - -The `server` global object provides a number of functions and objects to interact with Harper's HTTP, networking, and authentication services. - -### `server.http(listener: RequestListener, options: HttpOptions): HttpServer[]` - -Alias: `server.request` - -Add a handler method to the HTTP server request listener middleware chain. - -Returns an array of server instances based on the specified `options.port` and `options.securePort`. - -Example: - -```js -server.http( - (request, next) => { - return request.url === '/graphql' ? handleGraphQLRequest(request) : next(request); - }, - { - runFirst: true, // run this handler first - } -); -``` - -#### `RequestListener` - -Type: `(request: Request, next: RequestListener) => Promise` - -The HTTP request listener to be added to the middleware chain. To continue chain execution pass the `request` to the `next` function such as `return next(request);`. - -### `Request` and `Response` - -The `Request` and `Response` classes are based on the WHATWG APIs for the [`Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) and [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) classes. Requests and responses are based on these standard-based APIs to facilitate reuse with modern web code. While Node.js' HTTP APIs are powerful low-level APIs, the `Request`/`Response` APIs provide excellent composability characteristics, well suited for layered middleware and for clean mapping to [RESTful method handlers](./resources/) with promise-based responses, as well as interoperability with other standards-based APIs like [streams](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) used with [`Blob`s](https://developer.mozilla.org/en-US/docs/Web/API/Blob). However, the Harper implementation of these classes is not a direct implementation of the WHATWG APIs, but implements additional/distinct properties for the the Harper server environment: - -#### `Request` - -A `Request` object is passed to the direct static REST handlers, and preserved as the context for instance methods, and has the following properties: - -- `url` - This is the request target, which is the portion of the URL that was received by the server. If a client sends a request to `https://example.com:8080/path?query=string`, the actual received request is `GET /path?query=string` and the `url` property will be `/path?query=string`. -- `method` - This is the HTTP method of the request. This is a string like `GET`, `POST`, `PUT`, `DELETE`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the request. -- `pathname` - This is the path portion of the URL, without the query string. For example, if the URL is `/path?query=string`, the `pathname` will be `/path`. -- `protocol` - This is the protocol of the request, like `http` or `https`. -- `data` - This is the deserialized body of the request (based on the type of data specified by `Content-Type` header). -- `ip` - This is the remote IP address of the client that made the request (or the remote IP address of the last proxy to connect to Harper). -- `host` - This is the host of the request, like `example.com`. -- `sendEarlyHints(link: string, headers?: object): void` - This method sends an early hints response to the client, prior to actually returning a response. This is useful for sending a link header to the client to indicate that another resource should be preloaded. The `headers` argument can be used to send additional headers with the early hints response, in addition to the `link`. This is generally most helpful in a cache resolution function, where you can send hints _if_ the data is not in the cache and is resolving from an origin: - -```javascript -class Origin { - async get(request) { - // if we are fetching data from origin, send early hints - this.getContext().requestContext.sendEarlyHints(''); - let response = await fetch(request); - ... - } -} -Cache.sourcedFrom(Origin); -``` - -- `login(username, password): Promise` - This method can be called to start an authenticated session. The login will authenticate the user by username and password. If the authentication was successful, a session will be created and a cookie will be set on the response header that references the session. All subsequent requests from the client that sends the cookie in requests will be authenticated as the user that logged in and the session record will be attached to the request. This method returns a promise that resolves when the login is successful, and rejects if the login is unsuccessful. -- `session` - This is the session object that is associated with current cookie-maintained session. This object is used to store session data for the current session. This is `Table` record instance, and can be updated by calling `request.session.update({ key: value })` or session can be retrieved with `request.session.get()`. If the cookie has not been set yet, a cookie will be set the first time a session is updated or a login occurs. -- `_nodeRequest` - This is the underlying Node.js [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) object. This can be used to access the raw request data, such as the raw headers, raw body, etc. However, this is discouraged and should be used with caution since it will likely break any other server handlers that depends on the layered `Request` call with `Response` return pattern. -- `_nodeResponse` - This is the underlying Node.js [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) object. This can be used to access the raw response data, such as the raw headers. Again, this is discouraged and can cause problems for middleware, should only be used if you are certain that other server handlers will not attempt to return a different `Response` object. - -#### `Response` - -REST methods can directly return data that is serialized and returned to users, or it can return a `Response` object (or a promise to a `Response`), or it can return a `Response`-like object with the following properties (or again, a promise to it): - -- `status` - This is the HTTP status code of the response. This is a number like `200`, `404`, `500`, etc. -- `headers` - This is a [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) object that contains the headers of the response. -- `data` - This is the data to be returned of the response. This will be serialized using Harper's content negotiation. -- `body` - Alternately (to `data`), the raw body can be returned as a `Buffer`, string, stream (Node.js or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream)), or a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). - -#### `HttpOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTPS server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -#### `HttpServer` - -Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. - -### `server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer` - -Creates a socket server on the specified `options.port` or `options.securePort`. - -Only one socket server will be created. A `securePort` takes precedence. - -#### `ConnectionListener` - -Node.js socket server connection listener as documented in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener) - -#### `SocketOptions` - -- `port` - _optional_ - `number` - Specify the port for the [`net.Server`](https://nodejs.org/api/net.html#class-netserver) instance. -- `securePort` - _optional_ - `number` - Specify the port for the [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -#### `SocketServer` - -Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. - -### `server.ws(listener: WsListener, options: WsOptions): HttpServer[]` - -Add a listener to the WebSocket connection listener middleware chain. The WebSocket server is associated with the HTTP server specified by the `options.port` or `options.securePort`. Use the [`server.upgrade()`](globals#serverupgradelistener-upgradelistener-options-upgradeoptions-void) method to add a listener to the upgrade middleware chain. - -Example: - -```js -server.ws((ws, request, chainCompletion) => { - chainCompletion.then(() => { - ws.on('error', console.error); - - ws.on('message', function message(data) { - console.log('received: %s', data); - }); - - ws.send('something'); - }); -}); -``` - -#### `WsListener` - -Type: `(ws: WebSocket, request: Request, chainCompletion: ChainCompletion, next: WsListener): Promise` - -The WebSocket connection listener. - -- The `ws` argument is the [WebSocket](https://github.com/websockets/ws/blob/master/doc/ws.md#class-websocket) instance as defined by the `ws` module. -- The `request` argument is Harper's transformation of the `IncomingMessage` argument of the standard ['connection'](https://github.com/websockets/ws/blob/master/doc/ws.md#event-connection) listener event for a WebSocket server. -- The `chainCompletion` argument is a `Promise` of the associated HTTP server's request chain. Awaiting this promise enables the user to ensure the HTTP request has finished being processed before operating on the WebSocket. -- The `next` argument is similar to that of other `next` arguments in Harper's server middlewares. To continue execution of the WebSocket connection listener middleware chain, pass all of the other arguments to this one such as: `next(ws, request, chainCompletion)` - -#### `WsOptions` - -Type: `Object` - -Properties: - -- `maxPayload` - _optional_ - `number` - Set the max payload size for the WebSocket server. Defaults to 100 MB. -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which WebSocket server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which WebSocket secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.upgrade(listener: UpgradeListener, options: UpgradeOptions): void` - -Add a listener to the HTTP Server [upgrade](https://nodejs.org/api/http.html#event-upgrade_1) event. If a WebSocket connection listener is added using [`server.ws()`](globals#serverwslistener-wslistener-options-wsoptions-httpserver), a default upgrade handler will be added as well. The default upgrade handler will add a `__harperdb_request_upgraded` boolean to the `request` argument to signal the connection has already been upgraded. It will also check for this boolean _before_ upgrading and if it is `true`, it will pass the arguments along to the `next` listener. - -This method should be used to delegate HTTP upgrade events to an external WebSocket server instance. - -Example: - -> This example is from the Harper Next.js component. See the complete source code [here](https://github.com/HarperDB/nextjs/blob/main/extension.js) - -```js -server.upgrade( - (request, socket, head, next) => { - if (request.url === '/_next/webpack-hmr') { - return upgradeHandler(request, socket, head).then(() => { - request.__harperdb_request_upgraded = true; - - next(request, socket, head); - }); - } - - return next(request, socket, head); - }, - { runFirst: true } -); -``` - -#### `UpgradeListener` - -Type: `(request, socket, head, next) => void` - -The arguments are passed to the middleware chain from the HTTP server [`'upgrade'`](https://nodejs.org/api/http.html#event-upgrade_1) event. - -#### `UpgradeOptions` - -Type: `Object` - -Properties: - -- `runFirst` - _optional_ - `boolean` - Add listener to the front of the middleware chain. Defaults to `false` -- `port` - _optional_ - `number` - Specify which HTTP server middleware chain to add the listener to. Defaults to the Harper system default HTTP port configured by `harperdb-config.yaml`, generally `9926` -- `securePort` - _optional_ - `number` - Specify which HTTP secure server middleware chain to add the listener to. Defaults to the Harper system default HTTP secure port configured by `harperdb-config.yaml`, generally `9927` - -### `server.config` - -This provides access to the Harper configuration object. This comes from the [harperdb-config.yaml](../deployments/configuration) (parsed into object form). - -### `server.recordAnalytics(value, metric, path?, method?, type?)` - -This records the provided value as a metric into Harper's analytics. Harper efficiently records and tracks these metrics and makes them available through [analytics API](analytics). The values are aggregated and statistical information is computed when many operations are performed. The optional parameters can be used to group statistics. For the parameters, make sure you are not grouping on too fine of a level for useful aggregation. The parameters are: - -- `value` - This is a numeric value for the metric that is being recorded. This can be a value measuring time or bytes, for example. -- `metric` - This is the name of the metric. -- `path` - This is an optional path (like a URL path). For a URL like /my-resource/, you would typically include a path of "my-resource", not including the id so you can group by all the requests to "my-resource" instead of individually aggregating by each individual id. -- `method` - Optional method to group by. -- `type` - Optional type to group by. - -### `server.getUser(username): Promise` - -This returns the user object with permissions/authorization information based on the provided username. This does not verify the password, so it is generally used for looking up users by username. If you want to verify a user by password, use [`server.authenticateUser`](globals#serverauthenticateuserusername-password-user). - -### `server.authenticateUser(username, password): Promise` - -This returns the user object with permissions/authorization information based on the provided username. The password will be verified before returning the user object (if the password is incorrect, an error will be thrown). - -### `server.resources: Resources` - -This provides access to the map of all registered resources. This is the central registry in Harper for registering any resources to be exported for use by REST, MQTT, or other components. Components that want to register resources should use the `server.resources.set(name, resource)` method to add to this map. Exported resources can be found by passing in a path to `server.resources.getMatch(path)` which will find any resource that matches the path or beginning of the path. - -#### `server.resources.set(name, resource, exportTypes?)` - -Register a resource with the server. For example: - -``` -class NewResource extends Resource { -} -server.resources.set('NewResource', Resource); -/ or limit usage: -server.resources.set('NewResource', Resource, { rest: true, mqtt: false, 'my-protocol': true }); -``` - -#### `server.resources.getMatch(path, exportType?)` - -Find a resource that matches the path. For example: - -``` -server.resources.getMatch('/NewResource/some-id'); -/ or specify the export/protocol type, to allow it to be limited: -server.resources.getMatch('/NewResource/some-id', 'my-protocol'); -``` - -### `server.operation(operation: Object, context?: Object, authorize?: boolean)` - -Execute an operation from the [Operations API](../developers/operations-api) - -Parameters: - -- `operation` - `Object` - Object matching desired operation's request body -- `context` - `Object` - `{ username: string}` - _optional_ - The specified user -- `authorize` - `boolean` - _optional_ - Indicate the operation should authorize the user or not. Defaults to `false` - -Returns a `Promise` with the operation's response as per the [Operations API documentation](../developers/operations-api). - -### `server.nodes` - -Returns an array of node objects registered in the cluster - -### `server.shards` - -Returns map of shard number to an array of its associated nodes - -### `server.hostname` - -Returns the hostname of the current node - -### `server.contentTypes` - -Returns the `Map` of registered content type handlers. Same as the [`contentTypes`](./globals#contenttypes) global. - -## `contentTypes` - -Returns a [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for request/response serialization. - -HarperDB uses content negotiation to automatically handle data serialization and deserialization for HTTP requests and other protocols. This process works by: - -1. **Request Processing**: Comparing the `Content-Type` header with registered handlers to deserialize incoming data into structured formats for processing and storage -2. **Response Generation**: Comparing the `Accept` header with registered handlers to serialize structured data into the appropriate response format - -### Built-in Content Types - -HarperDB includes handlers for common formats: - -- **JSON** (`application/json`) -- **CBOR** (`application/cbor`) -- **MessagePack** (`application/msgpack`) -- **CSV** (`text/csv`) -- **Event-Stream** (`text/event-stream`) -- And more... - -### Custom Content Type Handlers - -You can extend or replace content type handlers by modifying the `contentTypes` map from the `server` global (or `harperdb` export). The map is keyed by MIME type, with values being handler objects containing these optional properties: - -#### Handler Properties - -- **`serialize(data: any): Buffer | Uint8Array | string`** - Called to convert data structures into the target format for responses. Should return binary data (Buffer/Uint8Array) or a string. - -- **`serializeStream(data: any): ReadableStream`** - Called to convert data structures into streaming format. Useful for handling asynchronous iterables or large datasets. - -- **`deserialize(buffer: Buffer | string): any`** - Called to convert incoming request data into structured format. Receives a string for text MIME types (`text/*`) and a Buffer for binary types. Only used if `deserializeStream` is not defined. - -- **`deserializeStream(stream: ReadableStream): any`** - Called to convert incoming request streams into structured format. Returns deserialized data (potentially as an asynchronous iterable). - -- **`q: number`** _(default: 1)_ - Quality indicator between 0 and 1 representing serialization fidelity. Used in content negotiation to select the best format when multiple options are available. The server chooses the content type with the highest product of client quality × server quality values. - -For example, if you wanted to define an XML serializer (that can respond with XML to requests with `Accept: text/xml`) you could write: - -```javascript -contentTypes.set('text/xml', { - serialize(data) { - return '' ... some serialization ''; - }, - q: 0.8, -}); -``` diff --git a/versioned_docs/version-4.7/reference/graphql.md b/versioned_docs/version-4.7/reference/graphql.md deleted file mode 100644 index cc43eec9..00000000 --- a/versioned_docs/version-4.7/reference/graphql.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: GraphQL Querying ---- - -# GraphQL Querying - -Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../developers/applications/defining-schemas), and for querying [Resources](./resources/). - -Get started by setting `graphql: true` in `config.yaml`. - -This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. - -> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. - -Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. - -For example, to request the GraphQL Query: - -```graphql -query GetDogs { - Dog { - id - name - } -} -``` - -The `GET` request would look like: - -```http -GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D -Accept: application/graphql-response+json -``` - -And the `POST` request would look like: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDogs { Dog { id name } } }" -} -``` - -> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. - -The Harper GraphQL querying system is strictly limited to exported Harper Resources. For many users, this will typically be a table that uses the `@exported` directive in its schema. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](./resources/#query) for more complex queries. - -Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: - -```graphql -query GetDogsAndOwners { - Dog { - id - name - breed - } - - Owner { - id - name - occupation - } -} -``` - -This will return all dogs and owners in the database. And is equivalent to executing two REST queries: - -```http -GET /Dog/?select(id,name,breed) -# and -GET /Owner/?select(id,name,occupation) -``` - -### Request Parameters - -There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` - -1. `query` - _Required_ - The string representation of the GraphQL document. - 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. - 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). - 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. -1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter -1. `variables` - _Optional_ - A map of variable values to be used for the specified query - -### Type Checking - -The Harper GraphQL Querying system takes many liberties from the GraphQL specification. This extends to how it handle type checking. In general, the querying system does **not** type check. Harper uses the `graphql` parser directly, and then performs a transformation on the resulting AST. We do not control any type checking/casting behavior of the parser, and since the execution step diverges from the spec greatly, the type checking behavior is only loosely defined. - -In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. - -For example, the variable `$name: String!` states that `name` should be a non-null, string value. - -- If the request does not contain the `name` variable, an error will be returned -- If the request provides `null` for the `name` variable, an error will be returned -- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. -- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. - - If `null` is provided as the variable value, an error will still be returned. - - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. -- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. - -The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. - -- Objects will be transformed into properly nested attributes -- Strings and Boolean values are passed through as their AST values -- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. -- List and Enums are not supported. - -### Fragments - -The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. - -For example, in the query - -```graphql -query Get { - Dog { - ...DogFields - } -} - -fragment DogFields on Dog { - name - breed -} -``` - -The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). - -You can literally specify anything in the fragment and it will behave the same way: - -```graphql -fragment DogFields on Any { ... } # this is recommended -fragment DogFields on Cat { ... } -fragment DogFields on Animal { ... } -fragment DogFields on LiterallyAnything { ... } -``` - -As an actual example, fragments should be used for composition: - -```graphql -query Get { - Dog { - ...sharedFields - breed - } - Owner { - ...sharedFields - occupation - } -} - -fragment sharedFields on Any { - id - name -} -``` - -### Short Form Querying - -Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. - -For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. - -```graphql -query GetDog($id: ID!) { - Dog(id: $id) { - name - breed - owner { - name - } - } -} -``` - -And as a properly formed request: - -```http -POST /graphql/ -Content-Type: application/json -Accept: application/graphql-response+json - -{ - "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", - "variables": { - "id": "0" - } -} -``` - -The REST equivalent would be: - -```http -GET /Dog/?id==0&select(name,breed,owner{name}) -# or -GET /Dog/0?select(name,breed,owner{name}) -``` - -Short form queries can handle nested attributes as well. - -For example, return all dogs who have an owner with the name `"John"` - -```graphql -query GetDog { - Dog(owner: { name: "John" }) { - name - breed - owner { - name - } - } -} -``` - -Would be equivalent to - -```http -GET /Dog/?owner.name==John&select(name,breed,owner{name}) -``` - -And finally, we can put all of these together to create semi-complex, equality based queries! - -The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. - -```graphql -query GetDog($dogName: String!, $ownerName: String!) { - Dog(name: $dogName, owner: { name: $ownerName }) { - name - breed - owner { - name - } - } -} -``` - -### Long Form Querying - -> Coming soon! - -### Mutations - -> Coming soon! - -### Subscriptions - -> Coming soon! - -### Directives - -> Coming soon! diff --git a/versioned_docs/version-4.7/reference/headers.md b/versioned_docs/version-4.7/reference/headers.md deleted file mode 100644 index 5c85fc88..00000000 --- a/versioned_docs/version-4.7/reference/headers.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Harper Headers ---- - -# Harper Headers - -All Harper API responses include headers that are important for interoperability and debugging purposes. The following headers are returned with all Harper API responses: - -| Key | Example Value | Description | -| ------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| server-timing | db;dur=7.165 | This reports the duration of the operation, in milliseconds. This follows the standard for Server-Timing and can be consumed by network monitoring tools. | -| content-type | application/json | This reports the MIME type of the returned content, which is negotiated based on the requested content type in the Accept header. | diff --git a/versioned_docs/version-4.7/reference/index.md b/versioned_docs/version-4.7/reference/index.md deleted file mode 100644 index 4c5d867a..00000000 --- a/versioned_docs/version-4.7/reference/index.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Reference ---- - -# Reference - -This section contains technical details and reference materials for Harper. - -Please choose a topic from the navigation menu on the left. diff --git a/versioned_docs/version-4.7/reference/limits.md b/versioned_docs/version-4.7/reference/limits.md deleted file mode 100644 index 97214620..00000000 --- a/versioned_docs/version-4.7/reference/limits.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Harper Limits ---- - -# Harper Limits - -This document outlines limitations of Harper. - -## Database Naming Restrictions - -**Case Sensitivity** - -Harper database metadata (database names, table names, and attribute/column names) are case sensitive. Meaning databases, tables, and attributes can differ only by the case of their characters. - -**Restrictions on Database Metadata Names** - -Harper database metadata (database names, table names, and attribute names) cannot contain the following UTF-8 characters: - -``` -/`¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ -``` - -Additionally, they cannot contain the first 31 non-printing characters. Spaces are allowed, but not recommended as best practice. The regular expression used to verify a name is valid is: - -``` -^[\x20-\x2E|\x30-\x5F|\x61-\x7E]*$ -``` - -## Table Limitations - -**Attribute Maximum** - -Harper limits the number of total indexed attributes across tables (including the primary key of each table) to 10,000 per database. - -## Primary Keys - -The maximum length of a primary key is 1978 bytes or 659 characters (whichever is shortest). diff --git a/versioned_docs/version-4.7/reference/resources/index.md b/versioned_docs/version-4.7/reference/resources/index.md deleted file mode 100644 index 82269149..00000000 --- a/versioned_docs/version-4.7/reference/resources/index.md +++ /dev/null @@ -1,796 +0,0 @@ ---- -title: Resource Class ---- - -# Resource Class - -## Resource Class - -The Resource class is designed to provide a unified API for modeling different data resources within Harper. Database/table data can be accessed through the Resource API. The Resource class can be extended to create new data sources. Resources can be exported to define endpoints. Tables themselves extend the Resource class, and can be extended by users. - -Conceptually, a Resource class provides an interface for accessing, querying, modifying, and monitoring a set of entities or records. Instances of a Resource class can represent a single record or entity, or a collection of records, at a given point in time, that you can interact with through various methods or queries. Resource instances can represent an atomic transactional view of a resource and facilitate transactional interaction. A Resource instance holds the primary key/identifier, context information, and any pending updates to the record, so any instance methods can act on the record and have full access to this information during execution. Therefore, there are distinct resource instances created for every record or query that is accessed, and the instance methods are used for interaction with the data. - -Resource classes also have static methods, which are generally the preferred way to externally interact with tables and resources. The static methods handle parsing paths and query strings, starting a transaction as necessary, performing access authorization checks (if required), creating a resource instance, and calling the instance methods. This general rule for how to interact with resources: - -- If you want to _act upon_ a table or resource, querying or writing to it, then use the static methods to initially access or write data. For example, you could use `MyTable.get(34)` to access the record with a primary key of `34`. -- If you want to _define custom behavior_ for a table or resource (to control how a resource responds to queries/writes), then extend the class and override/define instance methods. - -The Resource API is heavily influenced by the REST/HTTP API, and the methods and properties of the Resource class are designed to map to and be used in a similar way to how you would interact with a RESTful API. - -The REST-based API is a little different from traditional Create-Read-Update-Delete (CRUD) APIs that were designed with single-server interactions in mind. Semantics that attempt to guarantee no existing record or overwrite-only behavior require locks that don't scale well in distributed database. Centralizing writes around `put` calls provides much more scalable, simple, and consistent behavior in a distributed eventually consistent database. You can generally think of CRUD operations mapping to REST operations like this: - -- Read - `get` -- Create with a known primary key - `put` -- Create with a generated primary key - `post`/`create` -- Update (Full) - `put` -- Update (Partial) - `patch` -- Delete - `delete` - -The RESTful HTTP server and other server interfaces will directly call resource methods of the same name to fulfill incoming requests so resources can be defined as endpoints for external interaction. When resources are used by the server interfaces, the static method will be executed (which starts a transaction and does access checks), which will then create the resource instance and call the corresponding instance method. Paths (URL, MQTT topics) are mapped to different resource instances. Using a path that specifies an ID like `/MyResource/3492` will be mapped an instance of MyResource, and will call the instance methods like `get(target)`, `put(target, data)`, and `post(target, data)`, where target is based on the `/3492` part of the path. - -It is recommended that you use the latest version (V2) of the Resource API with the legacy instance binding behavior disabled. This is done by setting the static `loadAsInstance` property to `false` on the Resource class. This will become the default behavior in Harper version 5.0. This page is written assuming `loadAsInstance` is set to `false`. If you want to use the legacy instance binding behavior, you can set `loadAsInstance` to `true` on the Resource class. If you have existing code that you want to migrate, please see the [migration guide](resources/migration) for more information. - -You can create classes that extend `Resource` to define your own data sources, typically to interface with external data sources (the `Resource` base class is available as a global variable in the Harper JS environment). In doing this, you will generally be extending and providing implementations for the instance methods below. For example: - -```javascript -export class MyExternalData extends Resource { - static loadAsInstance = false; // enable the updated API - async get(target) { - // fetch data from an external source, using our id - let response = await this.fetch(target.id); - // do something with the response - } - put(target, data) { - // send the data into the external source - } - delete(target) { - // delete an entity in the external data source - } - subscribe(subscription) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - static loadAsInstance = false; // enable the updated API - get(target) { - // we can add properties or change properties before returning data: - return { ...super.get(target), newProperty: 'newValue', existingProperty: 42 }; // returns the record, with additional properties - } - put(target, data) { - // can change data any way we want - super.put(target, data); - } - delete(target) { - super.delete(target); - } - post(target, data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created in your `schema.graphql` file will be available as a property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -**Schema Definition:** -Tables are defined in your `schema.graphql` file using the `@table` directive. For example: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - price: Float -} -``` - -Once declared, `Product` will be available as `tables.Product` (or `databases.data.Product`). This mapping is automatic: every table defined in the default database in your schema will appear as a property on the `tables` object. For more info, read our complete [guide on defining schemas](../developers/applications/defining-schemas). - -#### Example - -```js -const Product = tables.Product; // Same as databases.data.Product - -// Create a new record (`id` is automatically generated when using `.create()`) -const created = await Product.create({ name: 'Shirt', price: 9.5 }); - -// Modify the record -await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); // 20% off! - -// Retrieve by primary key -const record = await Product.get(created.id); - -logger.info('New price:', record.price); - -// Query for all products with a `price` less than `8.00` -const query = { - conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], -}; - -for await (const record of Product.search(query)) { - // ... -} -``` - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created in your `schema.graphql` file will be available as a property on this object. The property values are objects containing the tables in that database, where each property is a table, just like the `tables` object. In fact, `databases.data === tables` should always be true. - -#### Example - -```js -const Product = databases.data.Product; // Default database -const Events = databases.analytics.Events; // Another database - -// Create a new event record -const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); - -// Query events -for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { - // Handle each event -} -``` - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](./components/extensions). - -### `transaction` - -This provides a function for starting transactions. See the [transactions documentation](./transactions) for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(target: RequestTarget | Id): Promise|AsyncIterable` - -This retrieves a record, or queries for records, and is called by HTTP GET requests. This can be called with a `RequestTarget` which can specify a path/id and query parameters as well as search parameters. For tables, this can also be called directly with an id (string or number) to retrieve a record by id. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. HTTP requests will always call `get` with a full `RequestTarget`. The default `get` method (`super.get(target)`) returns the current record as a plain object. - -The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -class extends Resource { - static loadAsInstance = false; - get(target) { - let param1 = target.get('param1'); // returns 'value' - let id = target.id; // returns 'some-id' - let path = target.pathname; // returns /some-id - let fullTarget = target.target; // returns /some-id?param1=value - ... - } -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return the record identified by the path. If `get` is called on a collection (`/Table/?name=value`), the target will have the `isCollection` property set to `true` and default action is to `search` and return an AsyncIterable of results. - -### `search(query: RequestTarget)`: AsyncIterable - -This performs a query on this resource or table. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an `AsyncIterable` of results. The `query` object can be used to specify the desired query. - -### `put(target: RequestTarget | Id, data: object): void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(target, data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `patch(target: RequestTarget | Id, data: object): void|Response` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(target, data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `update(target: RequestTarget, updates?: object): Updatable` - -This can be called to get an Updatable class for updating a record. An `Updatable` instance provides direct access to record properties as properties on `Updatable` instance. The properties can also be modified and any changes are tracked and written to the record when the transaction commits. For example, if we wanted to update the quantify of a product in the Product table, in response to a post, we could write: - -```javascript -class ... { - post(target, data) { - static loadAsInstance = false; - let updatable = this.update(target); - updatable.quantity = updatable.quantity - 1; - } -} -``` - -In addition, the `Updatable` class has the following methods. - -### `Updatable` class - -#### `addTo(property, value)` - -This adds the provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. We could improve the example above to reliably ensure the quantity is decremented even when it occurs in multiple nodes simultaneously: - -```javascript -class ... { - static loadAsInstance = false; - post(target, data) { - let updatable = this.update(target); - updatable.addTo('quantity', -1); - } -} -``` - -#### `subtractFrom(property, value)` - -This functions exactly the same as `addTo`, except it subtracts the value. - -The `Updatable` also inherits the `getUpdatedTime` and `getExpiresAt` methods from the `RecordObject` class. - -### `delete(target: RequestTarget): void|Response` - -This will delete this record or resource identified by the target, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(target)`) deletes the record identified by target from the table as part of the current transaction. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `publish(target: RequestTarget, message): void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(target, message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `post(target: RequestTarget, data: object): void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `target` object represents the target of a request and can be used to access the path, coerced id, and any query parameters that were included in the URL. - -### `invalidate(target: RequestTarget)` - -This method is available on tables. This will invalidate the specified record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(target: RequestTarget, incomingMessages?: AsyncIterable): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(target: RequestTarget|Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(target: RequestTarget|Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(target: RequestTarget|Id, data: object, context?: Resource|Context): Promise|any` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(target: RequestTarget|Id, recordUpdate: object, context?: Resource|Context): Promise|void` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(target: RequestTarget|Id, context?: Resource|Context): Promise|void` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(target: RequestTarget|Id, message: object, context?: Resource|Context): Promise|void` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: RequestTarget, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](./transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../developers/applications/defining-schemas) (in addition to the [schema documentation](../developers/applications/defining-schemas), see the [REST documentation](../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attribute`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### `RequestTarget` - -The `RequestTarget` class is used to represent a URL path that can be mapped to a resource. This is used by the REST interface to map a URL path to a resource class. All REST methods are called with a `RequestTarget` as the first argument, which is used to determine which record or entry to access or modify. Methods on a `Resource` class can be called with a primary key as a string or number value as the first argument, to access or modify a record by primary key, which will work with all the default methods. The static methods will be transform the primary key to a `RequestTarget` instance to call the instance methods for argument normalization. -When RequestTarget is constructed with a URL path (from the REST methods). The static methods will also automatically parse the path to a `RequestTarget` instance, including parsing the search string into query parameters. -Below are the properties and methods of the `RequestTarget` class: - -- `pathname` - The path of the URL relative to the resource path that matched this request. This excluded the query/search string -- `toString()` - The full relative path and search string of the URL -- `search` - The search/query part the target path (the part after the first `?` character) -- `id` - The primary key of the resource, as determined by the path -- `checkPermission` - This property is set to an object indicating that a permission check should be performed on the - resource. This is used by the REST interface to determine if a user has permission to access the resource. The object - contains: - - `action` - The type of action being performed (read/write/delete) - - `resource` - The resource being accessed - - `user` - The user requesting access - -`RequestTarget` is subclass of `URLSearchParams`, and these methods are available for accessing and modifying the query parameters: - -- `get(name: string)` - Get the value of the query parameter with the specified name -- `getAll(name: string)` - Get all the values of the query parameter with the specified name -- `set(name: string, value: string)` - Set the value of the query parameter with the specified name -- `append(name: string, value: string)` - Append the value to the query parameter with the specified name -- `delete(name: string)` - Delete the query parameter with the specified name -- `has(name: string)` - Check if the query parameter with the specified name exists - -In addition, the `RequestTarget` class is an iterable, so you can iterate through the query parameters: - -- `for (let [name, value] of target)` - Iterate through the query parameters - -When a `RequestTarget` has query parameters using Harper's extended query syntax, the REST static methods will parse the `RequestTarget` and potentially add any of the following properties if they are present in the query: - -- `conditions` - An array of conditions that will be used to filter the query results -- `limit` - The limit of the number of records to return -- `offset` - The number of records to skip before returning the results -- `sort` - The sort order of the query results -- `select` - The properties to return in the query results - -### `RecordObject` - -The `get` method will return a `RecordObject` instance, which is an object containing all the properties of the record. Any property on the record can be directly accessed and the properties can be enumerated with standard JS capabilities like `for`-`in` and `Object.keys`. The `RecordObject` instance will also have the following methods: - -- `getUpdatedTime()` - Get the last updated time (the version number) of the record -- `getExpiresAt()` - Get the expiration time of the entry, if there is one. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, you can interact through standard CRUD/REST methods to create, read, update, and delete records. You can idiomatic property access and modification to interact with the records themselves. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our own `get()` we can interact with the record: - -```javascript -export class CustomProduct extends Product { - async get(target) { - let record = await super.get(target); - let name = record.name; // this is the name of the current product - let rating = record.rating; // this is the rating of the current product - // we can't directly modify the record (it is frozen), but we can copy if we want to return a modification - record = { ...record, rating: 3 }; - return record; - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -// if we want to update a single property: -await Product.patch(1, { rating: 3 }); -``` - -When running inside a transaction, we can use the `update` method and updates are automatically saved when a request completes: - -```javascript -export class CustomProduct extends Product { - post(target, data) { - let record = this.update(target); - record.name = data.name; - record.description = data.description; - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let record = this.update(target); - let brandName = record.brand.name; - let firstVariationPrice = record.variations[0].price; - let additionalInfoOnBrand = record.brand.additionalInfo; // not defined in schema, but can still try to access property - // make some changes - record.variations.splice(0, 1); // remove first variation - record.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - record.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.update(1); -product1.delete('additionalInformation'); -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.7/reference/resources/instance-binding.md b/versioned_docs/version-4.7/reference/resources/instance-binding.md deleted file mode 100644 index 5c507e32..00000000 --- a/versioned_docs/version-4.7/reference/resources/instance-binding.md +++ /dev/null @@ -1,721 +0,0 @@ ---- -title: Resource Class with Resource Instance Binding behavior ---- - -# Resource Class with Resource Instance Binding behavior - -This document describes the legacy instance binding behavior of the Resource class. It is recommended that you use the [updated behavior of the Resource API](./) instead, but this legacy API is preserved for backwards compatibility. - -## Resource Class - -```javascript -export class MyExternalData extends Resource { - static loadAsInstance = true; - async get() { - // fetch data from an external source, using our id - let response = await this.fetch(this.id); - // do something with the response - } - put(data) { - // send the data into the external source - } - delete() { - // delete an entity in the external data source - } - subscribe(options) { - // if the external data source is capable of real-time notification of changes, can subscribe - } -} -// we can export this class from resources.json as our own endpoint, or use this as the source for -// a Harper data to store and cache the data coming from this data source: -tables.MyCache.sourcedFrom(MyExternalData); -``` - -You can also extend table classes in the same way, overriding the instance methods for custom functionality. The `tables` object is a global variable in the Harper JavaScript environment, along with `Resource`: - -```javascript -export class MyTable extends tables.MyTable { - get() { - // we can add properties or change properties before returning data: - this.newProperty = 'newValue'; - this.existingProperty = 44; - return super.get(); // returns the record, modified with the changes above - } - put(data) { - // can change data any way we want - super.put(data); - } - delete() { - super.delete(); - } - post(data) { - // providing a post handler (for HTTP POST requests) is a common way to create additional - // actions that aren't well described with just PUT or DELETE - } -} -``` - -Make sure that if are extending and `export`ing your table with this class, that you remove the `@export` directive in your schema, so that you aren't exporting the same table/class name twice. - -All Resource methods that are called from HTTP methods may directly return data or may return a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) object or an object with `headers` and a `status` (HTTP status code), to explicitly return specific headers and status code. - -## Global Variables - -### `tables` - -This is an object with all the tables in the default database (the default database is "data"). Each table that has been declared or created will be available as a (standard) property on this object, and the value will be the table class that can be used to interact with that table. The table classes implement the Resource API. - -### `databases` - -This is an object with all the databases that have been defined in Harper (in the running instance). Each database that has been declared or created will be available as a (standard) property on this object. The property values are an object with the tables in that database, where each property is a table, like the `tables` object. In fact, `databases.data === tables` should always be true. - -### `Resource` - -This is the Resource base class. This can be directly extended for custom resources, and is the base class for all tables. - -### `server` - -This object provides extension points for extension components that wish to implement new server functionality (new protocols, authentication, etc.). See the [extensions documentation for more information](../components/extensions). - -### `transaction` - -This provides a function for starting transactions. See the [transactions documentation](../transactions) for more information. - -### `contentTypes` - -This provides an interface for defining new content type handlers. See the content type extensions documentation for more information. - -### TypeScript Support - -While these objects/methods are all available as global variables, it is easier to get TypeScript support (code assistance, type checking) for these interfaces by explicitly `import`ing them. This can be done by setting up a package link to the main Harper package in your app: - -``` -# you may need to go to your harper directory and set it up as a link first -npm link harperdb -``` - -And then you can import any of the main Harper APIs you will use, and your IDE should understand the full typings associated with them: - -``` -import { databases, tables, Resource } from 'harperdb'; -``` - -## Resource Class (Instance) Methods - -### Properties/attributes declared in schema - -Properties that have been defined in your table's schema can be accessed and modified as direct properties on the Resource instances. - -### `get(queryOrProperty?)`: Resource|AsyncIterable - -This is called to return the record or data for this resource, and is called by HTTP GET requests. This may be optionally called with a `query` object to specify a query should be performed, or a string to indicate that the specified property value should be returned. When defining Resource classes, you can define or override this method to define exactly what should be returned when retrieving a record. The default `get` method (`super.get()`) returns the current record as a plain object. - -The query object can be used to access any query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -get(query) { - // note that query will only exist (as an object) if there is a query string - let param1 = query?.get?.('param1'); // returns 'value' - let id = this.getId(); // returns 'some-id' - ... -} -``` - -If `get` is called for a single record (for a request like `/Table/some-id`), the default action is to return `this` instance of the resource. If `get` is called on a collection (`/Table/?name=value`), the default action is to `search` and return an AsyncIterable of results. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.get(query)` performs a `get` on this specific record/resource, not on the whole table. If you wish to access a _different_ record, you should use the static `get` method on the table class, like `Table.get(otherId, context)`. - -### `search(query: Query)`: AsyncIterable - -This performs a query on this resource, searching for records that are descendants. By default, this is called by `get(query)` from a collection resource. When this is called for the root resource (like `/Table/`) it searches through all records in the table. However, if you call search from an instance with a specific ID like `1` from a path like `Table/1`, it will only return records that are descendants of that record, like `[1, 1]` (path of Table/1/1) and `[1, 2]` (path of Table/1/2). If you want to do a standard search of the table, make you call the static method like `Table.search(...)`. You can define or override this method to define how records should be queried. The default `search` method on tables (`super.search(query)`) will perform a query and return an AsyncIterable of results. The query object can be used to specify the desired query. - -### `getId(): string|number|Array` - -Returns the primary key value for this resource. - -### `put(data: object, query?: Query): Resource|void|Response` - -This will assign the provided record or data to this resource, and is called for HTTP PUT requests. You can define or override this method to define how records should be updated. The default `put` method on tables (`super.put(data)`) writes the record to the table (updating or inserting depending on if the record previously existed) as part of the current transaction for the resource instance. - -It is important to note that `this` is the resource instance for a specific record, specified by the primary key. Therefore, calling `super.put(data)` updates this specific record/resource, not another records in the table. If you wish to update a _different_ record, you should use the static `put` method on the table class, like `Table.put(data, context)`. - -The `query` argument is used to represent any additional query parameters that were included in the URL. For example, with a request to `/my-resource/some-id?param1=value`, we can access URL/request information: - -```javascript -put(data, query) { - let param1 = query?.get?.('param1'); // returns 'value' - ... -} -``` - -### `patch(data: object): Resource|void|Response` - -### `patch(data: object, query?: Query)` - -This will update the existing record with the provided data's properties, and is called for HTTP PATCH requests. You can define or override this method to define how records should be updated. The default `patch` method on tables (`super.patch(data)`) updates the record. The properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `data` object. This is performed as part of the current transaction for the resource instance. The `query` argument is used to represent any additional query parameters that were included. - -### `update(data: object, fullUpdate: boolean?)` - -This is called by the default `put` and `patch` handlers to update a record. `put` calls with `fullUpdate` as `true` to indicate a full record replacement (`patch` calls it with the second argument as `false`). Any additional property changes that are made before the transaction commits will also be persisted. - -### `delete(queryOrProperty?): Resource|void|Response` - -This will delete this record or resource, and is called for HTTP DELETE requests. You can define or override this method to define how records should be deleted. The default `delete` method on tables (`super.delete(record)`) deletes the record from the table as part of the current transaction. - -### `publish(message): Resource|void|Response` - -This will publish a message to this resource, and is called for MQTT publish commands. You can define or override this method to define how messages should be published. The default `publish` method on tables (`super.publish(message)`) records the published message as part of the current transaction; this will not change the data in the record but will notify any subscribers to the record/topic. - -### `post(data: object, query?: Query): Resource|void|Response` - -This is called for HTTP POST requests. You can define this method to provide your own implementation of how POST requests should be handled. Generally `POST` provides a generic mechanism for various types of data updates, and is a good place to define custom functionality for updating records. The default behavior is to create a new record/resource. The `query` argument is used to represent any additional query parameters that were included. - -### `invalidate()` - -This method is available on tables. This will invalidate the current record in the table. This can be used with a caching table and is used to indicate that the source data has changed, and the record needs to be reloaded when next accessed. - -### `subscribe(subscriptionRequest: SubscriptionRequest): Promise` - -This will subscribe to the current resource, and is called for MQTT subscribe commands. You can define or override this method to define how subscriptions should be handled. The default `subscribe` method on tables (`super.publish(message)`) will set up a listener that will be called for any changes or published messages to this resource. - -The returned (promise resolves to) Subscription object is an `AsyncIterable` that you can use a `for await` to iterate through. It also has a `queue` property which holds (an array of) any messages that are ready to be delivered immediately (if you have specified a start time, previous count, or there is a message for the current or "retained" record, these may be immediately returned). - -The `SubscriptionRequest` object supports the following properties (all optional): - -- `includeDescendants` - If this is enabled, this will create a subscription to all the record updates/messages that are prefixed with the id. For example, a subscription request of `{id:'sub', includeDescendants: true}` would return events for any update with an id/topic of the form sub/\* (like `sub/1`). -- `startTime` - This will begin the subscription at a past point in time, returning all updates/messages since the start time (a catch-up of historical messages). This can be used to resume a subscription, getting all messages since the last subscription. -- `previousCount` - This specifies the number of previous updates/messages to deliver. For example, `previousCount: 10` would return the last ten messages. Note that `previousCount` can not be used in conjunction with `startTime`. -- `omitCurrent` - Indicates that the current (or retained) record should _not_ be immediately sent as the first update in the subscription (if no `startTime` or `previousCount` was used). By default, the current record is sent as the first update. - -### `connect(incomingMessages?: AsyncIterable, query?: Query): AsyncIterable` - -This is called when a connection is received through WebSockets or Server Sent Events (SSE) to this resource path. This is called with `incomingMessages` as an iterable stream of incoming messages when the connection is from WebSockets, and is called with no arguments when the connection is from a SSE connection. This can return an asynchronous iterable representing the stream of messages to be sent to the client. - -### `set(property, value)` - -This will assign the provided value to the designated property in the resource's record. During a write operation, this will indicate that the record has changed and the changes will be saved during commit. During a read operation, this will modify the copy of the record that will be serialized during serialization (converted to the output format of JSON, MessagePack, etc.). - -### `allowCreate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to create the current resource. This is called as part of external incoming requests (HTTP). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's insert permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowRead(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to read from the current resource. This is called as part of external incoming requests (HTTP GET). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's read permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `allowUpdate(user: any, data: Promise, context: Context): boolean | Promise` - -This is called to determine if the user has permission to update the current resource. This is called as part of external incoming requests (HTTP PUT). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's update permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean, and may await the `data` promise to determine if the data is valid for creation. - -### `allowDelete(user: any, query: Map | void, context: Context): boolean | Promise` - -This is called to determine if the user has permission to delete the current resource. This is called as part of external incoming requests (HTTP DELETE). The default behavior for a generic resource is that this requires super-user permission and the default behavior for a table is to check the user's role's delete permission to the table. The allow method may be asynchronous and return a promise that resolves to a boolean. - -### `addTo(property, value)` - -This adds to provided value to the specified property using conflict-free data type (CRDT) incrementation. This ensures that even if multiple calls are simultaneously made to increment a value, the resulting merge of data changes from different threads and nodes will properly sum all the added values. - -### `getUpdatedTime(): number` - -This returns the last updated time of the resource (timestamp of last commit). This is returned as milliseconds from epoch. - -### `wasLoadedFromSource(): boolean` - -Indicates if the record had been loaded from source. When using caching tables, this indicates that there was a cache miss and the data had to be loaded from the source (or waiting on an inflight request from the source to finish). - -### `getContext(): Context` - -Returns the context for this resource. The context contains information about the current transaction, the user that initiated this action, and other metadata that should be retained through the life of an action. - -#### `Context` - -The `Context` object has the following (potential) properties: - -- `user` - This is the user object, which includes information about the username, role, and authorizations. -- `transaction` - The current transaction If the current method was triggered by an HTTP request, the following properties are available: -- `lastModified` - This value is used to indicate the last modified or updated timestamp of any resource(s) that are accessed and will inform the response's `ETag` (or `Last-Modified`) header. This can be updated by application code if it knows that modification should cause this timestamp to be updated. - -When a resource gets a request through HTTP, the request object is the context, which has the following properties: - -- `url` - The local path/URL of the request (this will not include the protocol or host name, but will start at the path and includes the query string). -- `method` - The method of the HTTP request. -- `headers` - This is an object with the headers that were included in the HTTP request. You can access headers by calling `context.headers.get(headerName)`. -- `responseHeaders` - This is an object with the headers that will be included in the HTTP response. You can set headers by calling `context.responseHeaders.set(headerName, value)`. -- `pathname` - This provides the path part of the URL (no querystring). -- `host` - This provides the host name of the request (from the `Host` header). -- `ip` - This provides the ip address of the client that made the request. -- `body` - This is the request body as a raw NodeJS Readable stream, if there is a request body. -- `data` - If the HTTP request had a request body, this provides a promise to the deserialized data from the request body. (Note that for methods that normally have a request body like `POST` and `PUT`, the resolved deserialized data is passed in as the main argument, but accessing the data from the context provides access to this for requests that do not traditionally have a request body like `DELETE`). - -When a resource is accessed as a data source: - -- `requestContext` - For resources that are acting as a data source for another resource, this provides access to the context of the resource that is making a request for data from the data source resource. Note that it is generally not recommended to rely on this context. The resolved data may be used fulfilled many different requests, and relying on this first request context may not be representative of future requests. Also, source resolution may be triggered by various actions, not just specified endpoints (for example queries, operations, studio, etc.), so make sure you are not relying on specific request context information. - -### `operation(operationObject: Object, authorize?: boolean): Promise` - -This method is available on tables and will execute a Harper operation, using the current table as the target of the operation (the `table` and `database` do not need to be specified). See the [operations API](../../developers/operations-api/) for available operations that can be performed. You can set the second argument to `true` if you want the current user to be checked for authorization for the operation (if `true`, will throw an error if they are not authorized). - -### `allowStaleWhileRevalidate(entry: { version: number, localTime: number, expiresAt: number, value: object }, id): boolean` - -For caching tables, this can be defined to allow stale entries to be returned while revalidation is taking place, rather than waiting for revalidation. The `version` is the timestamp/version from the source, the `localTime` is when the resource was last refreshed, the `expiresAt` is when the resource expired and became stale, and the `value` is the last value (the stale value) of the record/resource. All times are in milliseconds since epoch. Returning `true` will allow the current stale value to be returned while revalidation takes place concurrently. Returning `false` will cause the response to wait for the data source or origin to revalidate or provide the latest value first, and then return the latest value. - -## Resource Static Methods and Properties - -The Resource class also has static methods that mirror the instance methods with an initial argument that is the id of the record to act on. The static methods are generally the preferred and most convenient method for interacting with tables outside of methods that are directly extending a table. Whereas instances methods are bound to a specific record, the static methods allow you to specify any record in the table to act on. - -The `get`, `put`, `delete`, `publish`, `subscribe`, and `connect` methods all have static equivalents. There is also a `static search()` method for specifically handling searching a table with query parameters. By default, the Resource static methods default to creating an instance bound to the record specified by the arguments, and calling the instance methods. Again, generally static methods are the preferred way to interact with resources and call them from application code. These methods are available on all user Resource classes and tables. - -### `get(id: Id, context?: Resource|Context)` - -This will retrieve a resource instance by id. For example, if you want to retrieve comments by id in the retrieval of a blog post you could do: - -```javascript -const { MyTable, Comment } = tables; -... -// in class: - async get() { - for (let commentId of this.commentIds) { - let comment = await Comment.get(commentId, this); - // now you can do something with the comment record - } - } -``` - -Type definition for `Id`: - -```typescript -Id = string | number | array; -``` - -### `get(query: Query, context?: Resource|Context)` - -This can be used to retrieve a resource instance by a query. The query can be used to specify a single/unique record by an `id` property, and can be combined with a `select`: - -```javascript -MyTable.get({ id: 34, select: ['name', 'age'] }); -``` - -This method may also be used to retrieve a collection of records by a query. If the query is not for a specific record id, this will call the `search` method, described above. - -### `put(id: Id, record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same `id` (primary key). - -### `put(record: object, context?: Resource|Context): Promise` - -This will save the provided record or data to this resource. This will create a new record or fully replace an existing record if one exists with the same primary key provided in the record. If your table doesn't have a primary key attribute, you will need to use the method with the `id` argument. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `create(record: object, context?: Resource|Context): Promise` - -This will create a new record using the provided record for all fields (except primary key), generating a new primary key for the record. This does _not_ check for an existing record; the record argument should not have a primary key and should use the generated primary key. This will (asynchronously) return the new resource instance. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `post(id: Id, data: object, context?: Resource|Context): Promise` - -### `post(data: object, context?: Resource|Context): Promise` - -This will save the provided data to this resource. By default, this will create a new record (by calling `create`). However, the `post` method is specifically intended to be available for custom behaviors, so extending a class to support custom `post` method behavior is encouraged. - -### `patch(recordUpdate: object, context?: Resource|Context): Promise` - -### `patch(id: Id, recordUpdate: object, context?: Resource|Context): Promise` - -This will save the provided updates to the record. The `recordUpdate` object's properties will be applied to the existing record, overwriting the existing records properties, and preserving any properties in the record that are not specified in the `recordUpdate` object. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `delete(id: Id, context?: Resource|Context): Promise` - -Deletes this resource's record or data. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `publish(message: object, context?: Resource|Context): Promise` - -### `publish(topic: Id, message: object, context?: Resource|Context): Promise` - -Publishes the given message to the record entry specified by the id in the context. Make sure to `await` this function to ensure it finishes execution within the surrounding transaction. - -### `subscribe(subscriptionRequest?, context?: Resource|Context): Promise` - -Subscribes to a record/resource. See the description of the `subscriptionRequest` object above for more information on how to use this. - -### `search(query: Query, context?: Resource|Context): AsyncIterable` - -This will perform a query on this table or collection. The query parameter can be used to specify the desired query. - -### `setComputedAttribute(name: string, computeFunction: (record: object) => any)` - -This will define the function to use for a computed attribute. To use this, the attribute must be defined in the schema as a computed attribute. The `computeFunction` will be called with the record as an argument and should return the computed value for the attribute. For example: - -```javascript -MyTable.setComputedAttribute('computedAttribute', (record) => { - return record.attribute1 + record.attribute2; -}); -``` - -For a schema like: - -```graphql -type MyTable @table { - id: ID @primaryKey - attribute1: Int - attribute2: Int - computedAttribute: Int @computed -} -``` - -See the [schema documentation](../../developers/applications/defining-schemas) for more information on computed attributes. - -### `primaryKey` - -This property indicates the name of the primary key attribute for a table. You can get the primary key for a record using this property name. For example: - -```javascript -let record34 = await Table.get(34); -record34[Table.primaryKey] -> 34 -``` - -There are additional methods that are only available on table classes (which are a type of resource). - -### `Table.sourcedFrom(Resource, options)` - -This defines the source for a table. This allows a table to function as a cache for an external resource. When a table is configured to have a source, any request for a record that is not found in the table will be delegated to the source resource to retrieve (via `get`) and the result will be cached/stored in the table. All writes to the table will also first be delegated to the source (if the source defines write functions like `put`, `delete`, etc.). The `options` parameter can include an `expiration` property that will configure the table with a time-to-live expiration window for automatic deletion or invalidation of older entries. The `options` parameter (also) supports: - -- `expiration` - Default expiration time for records in seconds. -- `eviction` - Eviction time for records in seconds. -- `scanInterval` - Time period for scanning the table for records to evict. - -If the source resource implements subscription support, real-time invalidation can be performed to ensure the cache is guaranteed to be fresh (and this can eliminate or reduce the need for time-based expiration of data). - -### `directURLMapping` - -This property can be set to force the direct URL request target to be mapped to the resource primary key. Normally, URL resource targets are parsed, where the path is mapped to the primary key of the resource (and decoded using standard URL decoding), and any query string parameters are used to query that resource. But if this is turned on, the full URL is used as the primary key. For example: - -```javascript -export class MyTable extends tables.MyTable { - static directURLMapping = true; -} -``` - -```http request -GET /MyTable/test?foo=bar -``` - -This will be mapped to the resource with a primary key of `test?foo=bar`, and no querying will be performed on that resource. - -### `getRecordCount({ exactCount: boolean })` - -This will return the number of records in the table. By default, this will return an approximate count of records, which is fast and efficient. If you want an exact count, you can pass `{ exactCount: true }` as the first argument, but this will be slower and more expensive. The return value will be a Promise that resolves to an object with a `recordCount` property, which is the number of records in the table. If this was not an exact count, it will also include `estimatedRange` array with estimate range of the count. - -### `parsePath(path, context, query) {` - -This is called by static methods when they are responding to a URL (from HTTP request, for example), and translates the path to an id. By default, this will parse `.property` suffixes for accessing properties and specifying preferred content type in the URL (and for older tables it will convert a multi-segment path to multipart an array id). However, in some situations you may wish to preserve the path directly as a string. You can override `parsePath` for simpler path to id preservation: - -```javascript - static parsePath(path) { - return path; // return the path as the id - } -``` - -### `getRecordCount: Promise<{}>` - -### `isCollection(resource: Resource): boolean` - -This returns a boolean indicating if the provide resource instance represents a collection (can return a query result) or a single record/entity. - -### Context and Transactions - -Whenever you implement an action that is calling other resources, it is recommended that you provide the "context" for the action. This allows a secondary resource to be accessed through the same transaction, preserving atomicity and isolation. - -This also allows timestamps that are accessed during resolution to be used to determine the overall last updated timestamp, which informs the header timestamps (which facilitates accurate client-side caching). The context also maintains user, session, and request metadata information that is communicated so that contextual request information (like headers) can be accessed and any writes are properly attributed to the correct user, or any additional security checks to be applied to the user. - -When using an export resource class, the REST interface will automatically create a context for you with a transaction and request metadata, and you can pass this to other actions by simply including `this` as the source argument (second argument) to the static methods. - -For example, if we had a method to post a comment on a blog, and when this happens we also want to update an array of comment IDs on the blog record, but then add the comment to a separate comment table. We might do this: - -```javascript -const { Comment } = tables; - -export class BlogPost extends tables.BlogPost { - post(comment) { - // add a comment record to the comment table, using this resource as the source for the context - Comment.put(comment, this); - this.comments.push(comment.id); // add the id for the record to our array of comment ids - // Both of these actions will be committed atomically as part of the same transaction - } -} -``` - -Please see the [transaction documentation](../transactions) for more information on how transactions work in Harper. - -### Query - -The `get`/`search` methods accept a Query object that can be used to specify a query for data. The query is an object that has the following properties, which are all optional: - -#### `conditions` - -This is an array of objects that specify the conditions to use the match records (if conditions are omitted or it is an empty array, this is a search for everything in the table). Each condition object can have the following properties: - -- `attribute`: Name of the property/attribute to match on. -- `value`: The value to match. -- `comparator`: This can specify how the value is compared. This defaults to "equals", but can also be "greater_than", "greater_than_equal", "less_than", "less_than_equal", "starts_with", "contains", "ends_with", "between", and "not_equal". -- `conditions`: An array of conditions, which follows the same structure as above. -- `operator`: Specifies the operator to apply to this set of conditions (`and` or `or`. This is optional and defaults to `and`). For example, a complex query might look like: - -For example, a more complex query might look like: - -```javascript -Table.search({ - conditions: [ - { attribute: 'price', comparator: 'less_than', value: 100 }, - { - operator: 'or', - conditions: [ - { attribute: 'rating', comparator: 'greater_than', value: 4 }, - { attribute: 'featured', value: true }, - ], - }, - ], -}); -``` - -**Chained Attributes/Properties** - -Chained attribute/property references can be used to search on properties within related records that are referenced by [relationship properties](../../developers/applications/defining-schemas) (in addition to the [schema documentation](../../developers/applications/defining-schemas), see the [REST documentation](../../developers/rest) for more of overview of relationships and querying). Chained property references are specified with an array, with each entry in the array being a property name for successive property references. For example, if a relationship property called `brand` has been defined that references a `Brand` table, we could search products by brand name: - -```javascript -Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); -``` - -This effectively executes a join, searching on the `Brand` table and joining results with matching records in the `Product` table. Chained array properties can be used in any condition, as well nested/grouped conditions. The chain of properties may also be more than two entries, allowing for multiple relationships to be traversed, effectively joining across multiple tables. An array of chained properties can also be used as the `attribute` in the `sort` property, allowing for sorting by an attribute in a referenced joined tables. - -#### `operator` - -Specifies if the conditions should be applied as an `"and"` (records must match all conditions), or as an "or" (records must match at least one condition). This is optional and defaults to `"and"`. - -#### `limit` - -This specifies the limit of the number of records that should be returned from the query. - -#### `offset` - -This specifies the number of records that should be skipped prior to returning records in the query. This is often used with `limit` to implement "paging" of records. - -#### `select` - -This specifies the specific properties that should be included in each record that is returned. This can be an array, to specify a set of properties that should be included in the returned objects. The array can specify an `select.asArray = true` property and the query results will return a set of arrays of values of the specified properties instead of objects; this can be used to return more compact results. Each of the elements in the array can be a property name, or can be an object with a `name` and `select` array itself that specifies properties that should be returned by the referenced sub-object or related record. For example, a `select` can defined: - -```javascript -Table.search({ select: [ 'name', 'age' ], conditions: ...}) -``` - -Or nested/joined properties from referenced objects can be specified, here we are including the referenced `related` records, and returning the `description` and `id` from each of the related objects: - -```javascript -Table.search({ select: [ 'name', `{ name: 'related', select: ['description', 'id'] }` ], conditions: ...}) -``` - -The select properties can also include certain special properties: - -- `$id` - This will specifically return the primary key of the record (regardless of name, even if there is no defined primary key attribute for the table). -- `$updatedtime` - This will return the last updated timestamp/version of the record (regardless of whether there is an attribute for the updated time). - -Alternately, the select value can be a string value, to specify that the value of the specified property should be returned for each iteration/element in the results. For example to just return an iterator of the `id`s of object: - -```javascript -Table.search({ select: 'id', conditions: ...}) -``` - -#### `sort` - -This defines the sort order, and should be an object that can have the following properties: - -- `attributes`: The attribute to sort on. -- `descending`: If true, will sort in descending order (optional and defaults to `false`). -- `next`: Specifies the next sort order to resolve ties. This is an object that follows the same structure as `sort`. - -#### `explain` - -This will return the conditions re-ordered as Harper will execute them. Harper will estimate the number of the matching records for each condition and apply the narrowest condition applied first. - -#### `enforceExecutionOrder` - -This will force the conditions to be executed in the order they were supplied, rather than using query estimation to re-order them. - -The query results are returned as an `AsyncIterable`. In order to access the elements of the query results, you must use a `for await` loop (it does _not_ return an array, you can not access the results by index). - -For example, we could do a query like: - -```javascript -let { Product } = tables; -let results = Product.search({ - conditions: [ - { attribute: 'rating', value: 4.5, comparator: 'greater_than' }, - { attribute: 'price', value: 100, comparator: 'less_than' }, - ], - offset: 20, - limit: 10, - select: ['id', 'name', 'price', 'rating'], - sort: { attribute: 'price' }, -}); -for await (let record of results) { - // iterate through each record in the query results -} -``` - -`AsyncIterable`s can be returned from resource methods, and will be properly serialized in responses. When a query is performed, this will open/reserve a read transaction until the query results are iterated, either through your own `for await` loop or through serialization. Failing to iterate the results this will result in a long-lived read transaction which can degrade performance (including write performance), and may eventually be aborted. - -### Interacting with the Resource Data Model - -When extending or interacting with table resources, when a resource instance is retrieved and instantiated, it will be loaded with the record data from its table. You can interact with this record through the resource instance. For any properties that have been defined in the table's schema, you can direct access or modify properties through standard property syntax. For example, let's say we defined a product schema: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float -} -``` - -If we have extended this table class with our get() we can interact with any these specified attributes/properties: - -```javascript -export class CustomProduct extends Product { - get(query) { - let name = this.name; // this is the name of the current product - let rating = this.rating; // this is the rating of the current product - this.rating = 3; // we can also modify the rating for the current instance - // (with a get this won't be saved by default, but will be used when serialized) - return super.get(query); - } -} -``` - -Likewise, we can interact with resource instances in the same way when retrieving them through the static methods: - -```javascript -let product1 = await Product.get(1); -let name = product1.name; // this is the name of the product with a primary key of 1 -let rating = product1.rating; // this is the rating of the product with a primary key of 1 -product1.rating = 3; // modify the rating for this instance (this will be saved without a call to update()) -``` - -If there are additional properties on (some) products that aren't defined in the schema, we can still access them through the resource instance, but since they aren't declared, there won't be getter/setter definition for direct property access, but we can access properties with the `get(propertyName)` method and modify properties with the `set(propertyName, value)` method: - -```javascript -let product1 = await Product.get(1); -let additionalInformation = product1.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema -product1.set('newProperty', 'some value'); // we can assign any properties we want with set -``` - -And likewise, we can do this in an instance method, although you will probably want to use super.get()/set() so you don't have to write extra logic to avoid recursion: - -```javascript -export class CustomProduct extends Product { - get(query) { - let additionalInformation = super.get('additionalInformation'); // get the additionalInformation property value even though it isn't defined in the schema - super.set('newProperty', 'some value'); // we can assign any properties we want with set - } -} -``` - -Note that you may also need to use `get`/`set` for properties that conflict with existing method names. For example, your schema defines an attribute called `getId` (not recommended), you would need to access that property through `get('getId')` and `set('getId', value)`. - -If you want to save the changes you make, you can call the \`update()\`\` method: - -```javascript -let product1 = await Product.get(1); -product1.rating = 3; -product1.set('newProperty', 'some value'); -product1.update(); // save both of these property changes -``` - -Updates are automatically saved inside modifying methods like put and post: - -```javascript -export class CustomProduct extends Product { - post(data) { - this.name = data.name; - this.set('description', data.description); - // both of these changes will be saved automatically as this transaction commits - } -} -``` - -We can also interact with properties in nested objects and arrays, following the same patterns. For example we could define more complex types on our product: - -```graphql -type Product @table { - id: ID @primaryKey - name: String - rating: Int - price: Float - brand: Brand; - variations: [Variation]; -} -type Brand { - name: String -} -type Variation { - name: String - price: Float -} -``` - -We can interact with these nested properties: - -```javascript -export class CustomProduct extends Product { - post(data) { - let brandName = this.brand.name; - let firstVariationPrice = this.variations[0].price; - let additionalInfoOnBrand = this.brand.get('additionalInfo'); // not defined in schema, but can still try to access property - // make some changes - this.variations.splice(0, 1); // remove first variation - this.variations.push({ name: 'new variation', price: 9.99 }); // add a new variation - this.brand.name = 'new brand name'; - // all these change will be saved - } -} -``` - -If you need to delete a property, you can do with the `delete` method: - -```javascript -let product1 = await Product.get(1); -product1.delete('additionalInformation'); -product1.update(); -``` - -You can also get "plain" object representation of a resource instance by calling `toJSON`, which will return a simple frozen object with all the properties (whether defined in the schema) as direct normal properties (note that this object can _not_ be modified, it is frozen since it is belongs to a cache): - -```javascript -let product1 = await Product.get(1); -let plainObject = product1.toJSON(); -for (let key in plainObject) { - // can iterate through the properties of this record -} -``` - -## Response Object - -The resource methods can return an object that will be serialized and returned as the response to the client. However, these methods can also return a `Response` style object with `status`, `headers`, and optionally `body` or `data` properties. This allows you to have more control over the response, including setting custom headers and status codes. For example, you could return a redirect response like: - -```javascript -return `{ status: 302, headers: { Location: '/new-location' }` }; -``` - -If you include a `body` property, this must be a string or buffer that will be returned as the response body. If you include a `data` property, this must be an object that will be serialized as the response body (using the standard content negotiation). For example, we could return an object with a custom header: - -```javascript -return { status: 200, headers: { 'X-Custom-Header': 'custom value' }, data: `{ message: 'Hello, World!' }` }; -``` - -### Throwing Errors - -You may throw errors (and leave them uncaught) from the response methods and these should be caught and handled by protocol the handler. For REST requests/responses, this will result in an error response. By default the status code will be 500. You can assign a property of `statusCode` to errors to indicate the HTTP status code that should be returned. For example: - -```javascript -if (notAuthorized()) { - let error = new Error('You are not authorized to access this'); - error.statusCode = 403; - throw error; -} -``` diff --git a/versioned_docs/version-4.7/reference/resources/migration.md b/versioned_docs/version-4.7/reference/resources/migration.md deleted file mode 100644 index 51ec4c83..00000000 --- a/versioned_docs/version-4.7/reference/resources/migration.md +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: Migration to Resource API version 2 (non-instance binding) ---- - -# Migration to Resource API version 2 (non-instance binding) - -The Resource API was inspired by two major design ideas: the REST architectural design and the [Active Record pattern](https://en.wikipedia.org/wiki/Active_record_pattern) (made popular by Ruby on Rails and heavily used as a pattern in many ORMs). The basic design goal of the Resource API is to integrate these concepts into a single construct that can directly map RESTful methods (specifically the "uniform interface" of HTTP) to an active record data model. However, while the active record pattern has been for _consumption_ of data, implementing methods for endpoint definitions and caching sources as a data _provider_ can be confusing and cumbersome to implement. The updated non-instance binding Resource API is designed to make it easier and more consistent to implement a data provider and interact with records across a table, while maintaining more explicit control over what data is loaded and when. - -The updated Resource API is enabled on a per-class basis by setting static `loadAsInstance` property to `false`. When this property is set to `false`, this means that the Resource instances will not be bound to a specific record. Instead instances represent the whole table, capturing the context and current transactional state. Any records in the table can be loaded or modified from `this` instance. There are a number of implications and different behaviors from a Resource class with `static loadAsInstance = false`: - -- The `get` method (both static and instance) will directly return the record, a frozen enumerable object with direct properties, instead of a Resource instance. -- When instance methods are called, there will not be any record preloaded beforehand and the resource instance will not have properties mapped to a record. -- All instance methods accept a `target`, an instance of `RequestTarget`, as the first argument, which identifies the target record or query. - - The `target` will have an `id` property identifying the target resource, along with target information. - - The `getId()` method is no longer used and will return `undefined`. - - The `target` will provide access to query parameters, search operators, and other directives. - - A `target` property of `checkPermission` indicates that a method should check the permission before of request before proceeding. The default instance methods provide the default authorization behavior. - - This supplants the need for `allowRead`, `allowUpdate`, `allowCreate`, and `allowDelete` methods, which shouldn't need to be used (and don't provide the id of the target record). -- Any data from a POST, PUT, and PATCH request will be available in the second argument. This reverses the order of the arguments to `put`, `post`, and `patch` compared to the legacy Resource API. -- Context is tracked using asynchronous context tracking, and will automatically be available to calls to other resources. This can be disabled by setting `static explicitContext = true`, which can improve performance. -- The `update` method will return an `Updatable` object (instead of a Resource instance), which provides properties mapped to a record, but these properties can be updated and changes will be saved when the transaction is committed. - -The following are examples of how to migrate to the non-instance binding Resource API. - -Previous code with a `get` method: - -```javascript -export class MyData extends tables.MyData { - async get(query) { - let id = this.getId(); // get the id - if (query?.size > 0) { - // check number of query parameters - let idWithQuery = id + query.toString(); // add query parameters - let resource = await tables.MyData.get(idWithQuery, this); // retrieve another record - resource.newProperty = 'value'; // assign a new value to the returned resource instance - return resource; - } else { - this.newProperty = 'value'; // assign a new value to this instance - return super.get(query); - } - } -} -``` - -Updated code: - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - async get(target) { - let id = target.id; // get the id - let record; - if (target.size > 0) { - // check number of query parameters - let idWithQuery = target.toString(); // this is the full target with the path query parameters - // we can retrieve another record from this table directly with this.get/super.get or with tables.MyData.get - record = await super.get(idWithQuery); - } else { - record = await super.get(target); // we can just directly use the target as well - } - // the record itself is frozen, but we can copy/assign to a new object with additional properties if we want - return { ...record, newProperty: 'value' }; - } -} -``` - -Here is an example of the preferred approach for authorization: -Previous code with a `get` method: - -```javascript -export class MyData extends tables.MyData { - allowRead(user) { - // allow any authenticated user - return user ? true : false; - } - async get(query) { - // any get logic - return super.get(query); - } -} -``` - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - async get(target) { - // While you can still use allowRead, it is not called before get is called, and it is generally encouraged - // to perform/call authorization explicitly in direct get, put, post methods rather than using allow* methods. - if (!this.getContext().user) throw new Error('Unauthorized'); - target.checkPermissions = false; // authorization complete, no need to further check permissions below - // target.checkPermissions is set to true or left in place, this default get method will perform the default permissions checks - return super.get(target); // we can just directly use the query as well - } -} -``` - -Here is an example of how to convert/upgrade an implementation of a `post` method: -Previous code with a `post` method: - -```javascript -export class MyData extends tables.MyData { - async post(data, query) { - let resource = await tables.MyData.get(data.id, this); - if (resource) { - // update a property - resource.someProperty = 'value'; - // or - tables.MyData.patch(data.id, { someProperty: 'value' }, this); - } else { - // create a new record - MyData.create(data, this); - } - } -} -``` - -Updated code: - -```javascript -export class MyData extends tables.MyData { - static loadAsInstance = false; // opt in to updated behavior - // IMPORTANT: arguments are reversed: - async post(target, data) { - let record = await this.get(data.id); - if (record) { - // update a property - const updatable = await this.update(data.id); // we can alternately pass a target to update - updatable.someProperty = 'value'; - // or - this.patch(data.id, { someProperty: 'value' }); - } else { - // create a new record - this.create(data); - } - } -} -``` diff --git a/versioned_docs/version-4.7/reference/resources/query-optimization.md b/versioned_docs/version-4.7/reference/resources/query-optimization.md deleted file mode 100644 index 139b862b..00000000 --- a/versioned_docs/version-4.7/reference/resources/query-optimization.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Query Optimization ---- - -## Query Optimization - -Harper has powerful query functionality with excellent performance characteristics. However, like any database, different queries can vary significantly in performance. It is important to understand how querying works to help you optimize your queries for the best performance. - -### Query Execution - -At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database and delivering the results based on required fields, relationships, and ordering. Harper supports indexed fields, and these indexes are used to speed up query execution. When conditions are specified in a query, Harper will attempt to utilize indexes to optimize the speed of query execution. When a field is not indexed, a query specifies a condition on that field, and the database check each potential record to determine if it matches the condition. - -When a query is performed with multiple conditions, Harper will attempt to optimize the ordering of these conditions. When using intersecting conditions (the default, an `and` operator, matching records must all match all conditions), Harper will attempt to to apply the most selective and performant condition first. This means that if one condition can use an index and is more selective than another, it will be used first to find the initial matching set of data and then filter based on the remaining conditions. If a condition can search an indexed field, with a selective condition, it will be used before conditions that aren't indexed, or as selective. The `search` method includes an `explain` flag that can be used to return a query execution order to understand how the query is being executed. This can be useful for debugging and optimizing queries. - -For a union query, each condition is executed separately and the results are combined/merged. - -### Condition, Operators, and Indexing - -When a query is performed, the conditions specified in the query are evaluated against the data in the database. The conditions can be simple or complex, and can include scalar operators such as `=`, `!=`, `>`, `<`, `>=`, `<=`, as well as `starts_with`, `contains`, and `ends_with`. The use of these operators can affect the performance of the query, especially when used with indexed fields. If an indexed field is not used, the database will have to check each potential record to determine if it matches the condition. If the only condition is not indexed, or there are no conditions with an indexed field, the database will have to check every record with a full table scan and can be very slow for large datasets (it will get slower as the dataset grows, `O(n)`). - -The use of indexed fields can significantly improve the performance of a query, providing fast performance even as the database grows in size (`O(log n)`). However, indexed fields require extra writes to the database when performing insert, update, or delete operations. This is because the index must be updated to reflect the changes in the data. This can slow down write operations, but the trade-off is often worth it if the field is frequently used in queries. - -The different operators can also affect the performance of a query. For example, using the `=` operator on an indexed field is generally faster than using the `!=` operator, as the latter requires checking all records that do not match the condition. An index is a sorted listed of values, so the greater than and less than operators will also utilize indexed fields when possible. If the range is narrow, these operations can be very fast. A wide range could yield a large number of records and will naturally incur more overhead. The `starts_with` operator can also leverage indexed fields because it quickly find the correct matching entries in the sorted index. On other hand, the `contains` and `ends_with` and not equal (`!=` or `not_equal`) operators can not leverage the indexes, so they will require a full table scan to find the matching records if they are not used in conjunction in with a selective/indexed condition. There is a special case of `!= null` which can use indexes to find non-null records. However, there is generally only helpful for sparse fields where a small subset are non-null values. More generally, operators are more efficient if they are selecting on fields with a high cardinality. - -Conditions can be applied to primary key fields or other indexed fields (known as secondary indexes). In general, querying on a primary key will be faster than querying on a secondary index, as the primary key is the most efficient way to access data in the database, and doesn't require cross-referencing to the main records. - -### Relationships/Joins - -Harper supports relationships between tables, allowing for "join" queries that. This does result in more complex queries with potentially larger performance overhead, as more lookups are necessary to connect matched or selected data with other tables. Similar principles apply to conditions which use relationships. Indexed fields and comparators that leverage the ordering are still valuable for performance. It is also important that if a condition on a table is connected to another table's foreign key, that that foreign key also be indexed. Likewise, if a query `select`s data from a related table that uses a foreign key to relate, that it is indexed. The same principles of higher cardinality applies here as well, more unique values allow for efficient lookups. - -### Sorting - -Queries can also specify a sort order. This can also significantly impact performance. If a query specifies a sort order on an indexed field, the database can use the index to quickly retrieve the data in the specified order. A sort order can be used in conjunction with a condition on the same (indexed) field can utilize the index for ordering. However, if the sort order is not on an indexed field, or the query specifies conditions on different fields, Harper will generally need to sort the data after retrieving it, which can be slow for large datasets. The same principles apply to sorting as they do to conditions. Sorting on a primary key is generally faster than sorting on a secondary index, if the condition aligns with the sort order. - -### Streaming - -One of the unique and powerful features of Harper's querying functionality is the ability to stream query results. When possible, Harper can return records from a query as they are found, rather than waiting for the entire query to complete. This can significantly improve performance for large queries, as it allows the application to start processing results or sending the initial data before the entire query is complete (improving time-to-first-byte speed, for example). However, using a sort order on a query with conditions that are not on an aligned index requires that the entire query result be loaded in order to perform the sorting, which defeats the streaming benefits. diff --git a/versioned_docs/version-4.7/reference/roles.md b/versioned_docs/version-4.7/reference/roles.md deleted file mode 100644 index 2e3dc570..00000000 --- a/versioned_docs/version-4.7/reference/roles.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Roles ---- - -# Roles - -Roles in Harper are part of the application’s role-based access control (RBAC) system. You can declare roles in your application and manage their permissions through a roles configuration file. When the application starts, Harper will ensure all declared roles exist with the specified permissions, updating them if necessary. - -## Configuring Roles - -Point to a roles configuration file from your application’s `config.yaml`: - -```yaml -roles: - files: roles.yaml -``` - -You can declare one or more files. Each file should define one or more roles in YAML format. - -## Roles File Structure - -A roles file (`roles.yaml`) contains role definitions keyed by role name. Each role may contain: - -- **super_user** – a boolean that grants all permissions. -- **databases** – one or more databases the role has access to. -- **tables** – within each database, table-level and attribute-level permissions. - -**Full Example** - -```yaml -: - super_user: # optional - : - : - read: - insert: - update: - delete: - attributes: - : - read: - insert: - update: -``` - -## Role Flags - -- `super_user: true` — grants full system access. -- `super_user: false` — the role only has the explicit permissions defined in the role. - -## Database and Table Permissions - -Within each role, you may specify one or more databases. Each database can declare permissions for tables. - -Example: - -```yaml -analyst: - super_user: false - data: - Sales: - read: true - insert: false - update: false - delete: false -``` - -In this example, the `analyst` role has read-only access to the `Sales` table in the `data` database. - -## Attribute-Level Permissions - -You can also grant or deny access at the attribute level within a table. - -Example: - -```yaml -editor: - data: - Articles: - read: true - insert: true - update: true - attributes: - title: - read: true - update: true - author: - read: true - update: false -``` - -Here, the `editor` role can update the `title` of an article but cannot update the `author`. - -## Multiple Roles - -Roles can be defined side by side in a single file: - -```yaml -reader: - super_user: false - data: - Dog: - read: true - -writer: - super_user: false - data: - Dog: - insert: true - update: true -``` - -## Behavior on Startup - -- If a declared role does not exist, Harper creates it. -- If a declared role already exists, Harper updates its permissions to match the definition. -- Roles are enforced consistently across deployments, keeping access control in sync with your application code. diff --git a/versioned_docs/version-4.7/reference/sql-guide/date-functions.md b/versioned_docs/version-4.7/reference/sql-guide/date-functions.md deleted file mode 100644 index c9747dcd..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/date-functions.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -title: SQL Date Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Date Functions - -Harper utilizes [Coordinated Universal Time (UTC)](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) in all internal SQL operations. This means that date values passed into any of the functions below will be assumed to be in UTC or in a format that can be translated to UTC. - -When parsing date values passed to SQL date functions in HDB, we first check for [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) formats, then for [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3) date-time format and then fall back to new Date(date_string)if a known format is not found. - -### CURRENT_DATE() - -Returns the current date in UTC in `YYYY-MM-DD` String format. - -``` -"SELECT CURRENT_DATE() AS current_date_result" returns - { - "current_date_result": "2020-04-22" - } -``` - -### CURRENT_TIME() - -Returns the current time in UTC in `HH:mm:ss.SSS` String format. - -``` -"SELECT CURRENT_TIME() AS current_time_result" returns - { - "current_time_result": "15:18:14.639" - } -``` - -### CURRENT_TIMESTAMP - -Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. - -``` -"SELECT CURRENT_TIMESTAMP AS current_timestamp_result" returns - { - "current_timestamp_result": 1587568845765 - } -``` - -### DATE([date_string]) - -Formats and returns the date_string argument in UTC in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -If a date_string is not provided, the function will return the current UTC date/time value in the return format defined above. - -``` -"SELECT DATE(1587568845765) AS date_result" returns - { - "date_result": "2020-04-22T15:20:45.765+0000" - } -``` - -``` -"SELECT DATE(CURRENT_TIMESTAMP) AS date_result2" returns - { - "date_result2": "2020-04-22T15:20:45.765+0000" - } -``` - -### DATE_ADD(date, value, interval) - -Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_ADD(1587568845765, 1, 'days') AS date_add_result" AND -"SELECT DATE_ADD(1587568845765, 1, 'd') AS date_add_result" both return - { - "date_add_result": 1587655245765 - } -``` - -``` -"SELECT DATE_ADD(CURRENT_TIMESTAMP, 2, 'years') -AS date_add_result2" returns - { - "date_add_result2": 1650643129017 - } -``` - -### DATE_DIFF(date_1, date_2[, interval]) - -Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. - -Accepted interval values: - -- years -- months -- weeks -- days -- hours -- minutes -- seconds - -``` -"SELECT DATE_DIFF(CURRENT_TIMESTAMP, 1650643129017, 'hours') -AS date_diff_result" returns - { - "date_diff_result": -17519.753333333334 - } -``` - -### DATE_FORMAT(date, format) - -Formats and returns a date value in the String format provided. Find more details on accepted format values in the [moment.js docs](https://momentjs.com/docs/#/displaying/format/). - -``` -"SELECT DATE_FORMAT(1524412627973, 'YYYY-MM-DD HH:mm:ss') -AS date_format_result" returns - { - "date_format_result": "2018-04-22 15:57:07" - } -``` - -### DATE_SUB(date, value, interval) - -Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. - -| Key | Shorthand | -| ------------ | --------- | -| years | y | -| quarters | Q | -| months | M | -| weeks | w | -| days | d | -| hours | h | -| minutes | m | -| seconds | s | -| milliseconds | ms | - -``` -"SELECT DATE_SUB(1587568845765, 2, 'years') AS date_sub_result" returns - { - "date_sub_result": 1524410445765 - } -``` - -### EXTRACT(date, date_part) - -Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" - -| date_part | Example return value\* | -| ----------- | ---------------------- | -| year | "2020" | -| month | "3" | -| day | "26" | -| hour | "15" | -| minute | "13" | -| second | "2" | -| millisecond | "41" | - -``` -"SELECT EXTRACT(1587568845765, 'year') AS extract_result" returns - { - "extract_result": "2020" - } -``` - -### GETDATE() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT GETDATE() AS getdate_result" returns - { - "getdate_result": 1587568845765 - } -``` - -### GET_SERVER_TIME() - -Returns the current date/time value based on the server’s timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. - -``` -"SELECT GET_SERVER_TIME() AS get_server_time_result" returns - { - "get_server_time_result": "2020-04-22T15:20:45.765+0000" - } -``` - -### OFFSET_UTC(date, offset) - -Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. - -``` -"SELECT OFFSET_UTC(1587568845765, 240) AS offset_utc_result" returns - { - "offset_utc_result": "2020-04-22T19:20:45.765+0400" - } -``` - -``` -"SELECT OFFSET_UTC(1587568845765, 10) AS offset_utc_result2" returns - { - "offset_utc_result2": "2020-04-23T01:20:45.765+1000" - } -``` - -### NOW() - -Returns the current Unix Timestamp in milliseconds. - -``` -"SELECT NOW() AS now_result" returns - { - "now_result": 1587568845765 - } -``` diff --git a/versioned_docs/version-4.7/reference/sql-guide/features-matrix.md b/versioned_docs/version-4.7/reference/sql-guide/features-matrix.md deleted file mode 100644 index 7766faa4..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/features-matrix.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Features Matrix ---- - -# SQL Features Matrix - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## SQL Features Matrix - -Harper provides access to most SQL functions, and we’re always expanding that list. Check below to see if we cover what you need. - -| INSERT | | -| ---------------------------------- | --- | -| Values - multiple values supported | ✔ | -| Sub-SELECT | ✗ | - -| UPDATE | | -| ---------------- | --- | -| SET | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | - -| DELETE | | -| ---------- | --- | -| FROM | ✔ | -| Sub-SELECT | ✗ | -| Conditions | ✔ | - -| SELECT | | -| -------------------- | --- | -| Column SELECT | ✔ | -| Aliases | ✔ | -| Aggregator Functions | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Constant Values | ✔ | -| Distinct | ✔ | -| Sub-SELECT | ✗ | - -| FROM | | -| ---------------- | --- | -| Multi-table JOIN | ✔ | -| INNER JOIN | ✔ | -| LEFT OUTER JOIN | ✔ | -| LEFT INNER JOIN | ✔ | -| RIGHT OUTER JOIN | ✔ | -| RIGHT INNER JOIN | ✔ | -| FULL JOIN | ✔ | -| UNION | ✗ | -| Sub-SELECT | ✗ | -| TOP | ✔ | - -| WHERE | | -| -------------------------- | --- | -| Multi-Conditions | ✔ | -| Wildcards | ✔ | -| IN | ✔ | -| LIKE | ✔ | -| Bit-wise Operators AND, OR | ✔ | -| Bit-wise Operators NOT | ✔ | -| NULL | ✔ | -| BETWEEN | ✔ | -| EXISTS,ANY,ALL | ✔ | -| Compare columns | ✔ | -| Compare constants | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | -| Sub-SELECT | ✗ | - -| GROUP BY | | -| --------------------- | --- | -| Multi-Column GROUP BY | ✔ | - -| HAVING | | -| ----------------------------- | --- | -| Aggregate function conditions | ✔ | - -| ORDER BY | | -| --------------------- | --- | -| Multi-Column ORDER BY | ✔ | -| Aliases | ✔ | -| Date Functions\* | ✔ | -| Math Functions | ✔ | diff --git a/versioned_docs/version-4.7/reference/sql-guide/functions.md b/versioned_docs/version-4.7/reference/sql-guide/functions.md deleted file mode 100644 index 789090a4..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/functions.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: Harper SQL Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Functions - -This SQL keywords reference contains the SQL functions available in Harper. - -## Functions - -### Aggregate - -| Keyword | Syntax | Description | -| ------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `AVG` | `AVG(expression)` | Returns the average of a given numeric expression. | -| `COUNT` | `SELECT COUNT(column_name) FROM database.table WHERE condition` | Returns the number records that match the given criteria. Nulls are not counted. | -| `GROUP_CONCAT` | `GROUP_CONCAT(expression)` | Returns a string with concatenated values that are comma separated and that are non-null from a group. Will return null when there are non-null values. | -| `MAX` | `SELECT MAX(column_name) FROM database.table WHERE condition` | Returns largest value in a specified column. | -| `MIN` | `SELECT MIN(column_name) FROM database.table WHERE condition` | Returns smallest value in a specified column. | -| `SUM` | `SUM(column_name)` | Returns the sum of the numeric values provided. | -| `ARRAY`\* | `ARRAY(expression)` | Returns a list of data as a field. | -| `DISTINCT_ARRAY`\* | `DISTINCT_ARRAY(expression)` | When placed around a standard `ARRAY()` function, returns a distinct (deduplicated) results set. | - -\*For more information on `ARRAY()` and `DISTINCT_ARRAY()` see [this blog](https://www.harperdb.io/post/sql-queries-to-complex-objects). - -### Conversion - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------- | ---------------------------------------------------------------------- | -| `CAST` | `CAST(expression AS datatype(length))` | Converts a value to a specified datatype. | -| `CONVERT` | `CONVERT(data_type(length), expression, style)` | Converts a value from one datatype to a different, specified datatype. | - -### Date & Time - -| Keyword | Syntax | Description | -| ------------------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CURRENT_DATE` | `CURRENT_DATE()` | Returns the current date in UTC in "YYYY-MM-DD" String format. | -| `CURRENT_TIME` | `CURRENT_TIME()` | Returns the current time in UTC in "HH:mm:ss.SSS" string format. | -| `CURRENT_TIMESTAMP` | `CURRENT_TIMESTAMP` | Referencing this variable will evaluate as the current Unix Timestamp in milliseconds. For more information, go here. | -| `DATE` | `DATE([date_string])` | Formats and returns the date string argument in UTC in 'YYYY-MM-DDTHH:mm:ss.SSSZZ' string format. If a date string is not provided, the function will return the current UTC date/time value in the return format defined above. For more information, go here. | -| `DATE_ADD` | `DATE_ADD(date, value, interval)` | Adds the defined amount of time to the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted interval values: Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DATE_DIFF` | `DATE_DIFF(date_1, date_2[, interval])` | Returns the difference between the two date values passed based on the interval as a Number. If an interval is not provided, the function will return the difference value in milliseconds. For more information, go here. | -| `DATE_FORMAT` | `DATE_FORMAT(date, format)` | Formats and returns a date value in the String format provided. Find more details on accepted format values in the moment.js docs. For more information, go here. | -| `DATE_SUB` | `DATE_SUB(date, format)` | Subtracts the defined amount of time from the date provided in UTC and returns the resulting Unix Timestamp in milliseconds. Accepted date_sub interval values- Either string value (key or shorthand) can be passed as the interval argument. For more information, go here. | -| `DAY` | `DAY(date)` | Return the day of the month for the given date. | -| `DAYOFWEEK` | `DAYOFWEEK(date)` | Returns the numeric value of the weekday of the date given("YYYY-MM-DD").NOTE: 0=Sunday, 1=Monday, 2=Tuesday, 3=Wednesday, 4=Thursday, 5=Friday, and 6=Saturday. | -| `EXTRACT` | `EXTRACT(date, date_part)` | Extracts and returns the date_part requested as a String value. Accepted date_part values below show value returned for date = "2020-03-26T15:13:02.041+000" For more information, go here. | -| `GETDATE` | `GETDATE()` | Returns the current Unix Timestamp in milliseconds. | -| `GET_SERVER_TIME` | `GET_SERVER_TIME()` | Returns the current date/time value based on the server's timezone in `YYYY-MM-DDTHH:mm:ss.SSSZZ` String format. | -| `OFFSET_UTC` | `OFFSET_UTC(date, offset)` | Returns the UTC date time value with the offset provided included in the return String value formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. The offset argument will be added as minutes unless the value is less than 16 and greater than -16, in which case it will be treated as hours. | -| `NOW` | `NOW()` | Returns the current Unix Timestamp in milliseconds. | -| `HOUR` | `HOUR(datetime)` | Returns the hour part of a given date in range of 0 to 838. | -| `MINUTE` | `MINUTE(datetime)` | Returns the minute part of a time/datetime in range of 0 to 59. | -| `MONTH` | `MONTH(date)` | Returns month part for a specified date in range of 1 to 12. | -| `SECOND` | `SECOND(datetime)` | Returns the seconds part of a time/datetime in range of 0 to 59. | -| `YEAR` | `YEAR(date)` | Returns the year part for a specified date. | - -### Logical - -| Keyword | Syntax | Description | -| -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------ | -| `IF` | `IF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IIF` | `IIF(condition, value_if_true, value_if_false)` | Returns a value if the condition is true, or another value if the condition is false. | -| `IFNULL` | `IFNULL(expression, alt_value)` | Returns a specified value if the expression is null. | -| `NULLIF` | `NULLIF(expression_1, expression_2)` | Returns null if expression_1 is equal to expression_2, if not equal, returns expression_1. | - -### Mathematical - -| Keyword | Syntax | Description | -| -------- | ------------------------------- | --------------------------------------------------------------------------------------------------- | -| `ABS` | `ABS(expression)` | Returns the absolute value of a given numeric expression. | -| `CEIL` | `CEIL(number)` | Returns integer ceiling, the smallest integer value that is bigger than or equal to a given number. | -| `EXP` | `EXP(number)` | Returns e to the power of a specified number. | -| `FLOOR` | `FLOOR(number)` | Returns the largest integer value that is smaller than, or equal to, a given number. | -| `RANDOM` | `RANDOM(seed)` | Returns a pseudo random number. | -| `ROUND` | `ROUND(number, decimal_places)` | Rounds a given number to a specified number of decimal places. | -| `SQRT` | `SQRT(expression)` | Returns the square root of an expression. | - -### String - -| Keyword | Syntax | Description | -| ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `CONCAT` | `CONCAT(string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together, resulting in a single string. | -| `CONCAT_WS` | `CONCAT_WS(separator, string_1, string_2, ...., string_n)` | Concatenates, or joins, two or more strings together with a separator, resulting in a single string. | -| `INSTR` | `INSTR(string_1, string_2)` | Returns the first position, as an integer, of string_2 within string_1. | -| `LEN` | `LEN(string)` | Returns the length of a string. | -| `LOWER` | `LOWER(string)` | Converts a string to lower-case. | -| `REGEXP` | `SELECT column_name FROM database.table WHERE column_name REGEXP pattern` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REGEXP_LIKE` | `SELECT column_name FROM database.table WHERE REGEXP_LIKE(column_name, pattern)` | Searches column for matching string against a given regular expression pattern, provided as a string, and returns all matches. If no matches are found, it returns null. | -| `REPLACE` | `REPLACE(string, old_string, new_string)` | Replaces all instances of old_string within new_string, with string. | -| `SUBSTRING` | `SUBSTRING(string, string_position, length_of_substring)` | Extracts a specified amount of characters from a string. | -| `TRIM` | `TRIM([character(s) FROM] string)` | Removes leading and trailing spaces, or specified character(s), from a string. | -| `UPPER` | `UPPER(string)` | Converts a string to upper-case. | - -## Operators - -### Logical Operators - -| Keyword | Syntax | Description | -| --------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| `BETWEEN` | `SELECT column_name(s) FROM database.table WHERE column_name BETWEEN value_1 AND value_2` | (inclusive) Returns values(numbers, text, or dates) within a given range. | -| `IN` | `SELECT column_name(s) FROM database.table WHERE column_name IN(value(s))` | Used to specify multiple values in a WHERE clause. | -| `LIKE` | `SELECT column_name(s) FROM database.table WHERE column_n LIKE pattern` | Searches for a specified pattern within a WHERE clause. | - -## Queries - -### General - -| Keyword | Syntax | Description | -| ---------- | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| `DISTINCT` | `SELECT DISTINCT column_name(s) FROM database.table` | Returns only unique values, eliminating duplicate records. | -| `FROM` | `FROM database.table` | Used to list the database(s), table(s), and any joins required for a SQL statement. | -| `GROUP BY` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) ORDER BY column_name(s)` | Groups rows that have the same values into summary rows. | -| `HAVING` | `SELECT column_name(s) FROM database.table WHERE condition GROUP BY column_name(s) HAVING condition ORDER BY column_name(s)` | Filters data based on a group or aggregate function. | -| `SELECT` | `SELECT column_name(s) FROM database.table` | Selects data from table. | -| `WHERE` | `SELECT column_name(s) FROM database.table WHERE condition` | Extracts records based on a defined condition. | - -### Joins - -| Keyword | Syntax | Description | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `CROSS JOIN` | `SELECT column_name(s) FROM database.table_1 CROSS JOIN database.table_2` | Returns a paired combination of each row from `table_1` with row from `table_2`. Note: CROSS JOIN can return very large result sets and is generally considered bad practice. | -| `FULL OUTER` | `SELECT column_name(s) FROM database.table_1 FULL OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name WHERE condition` | Returns all records when there is a match in either `table_1` (left table) or `table_2` (right table). | -| `[INNER] JOIN` | `SELECT column_name(s) FROM database.table_1 INNER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return only matching records from `table_1` (left table) and `table_2` (right table). The INNER keyword is optional and does not affect the result. | -| `LEFT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 LEFT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_1` (left table) and matching data from `table_2` (right table). The OUTER keyword is optional and does not affect the result. | -| `RIGHT [OUTER] JOIN` | `SELECT column_name(s) FROM database.table_1 RIGHT OUTER JOIN database.table_2 ON table_1.column_name = table_2.column_name` | Return all records from `table_2` (right table) and matching data from `table_1` (left table). The OUTER keyword is optional and does not affect the result. | - -### Predicates - -| Keyword | Syntax | Description | -| ------------- | ------------------------------------------------------------------------- | -------------------------- | -| `IS NOT NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NOT NULL` | Tests for non-null values. | -| `IS NULL` | `SELECT column_name(s) FROM database.table WHERE column_name IS NULL` | Tests for null values. | - -### Statements - -| Keyword | Syntax | Description | -| -------- | ---------------------------------------------------------------------------------------- | ----------------------------------- | -| `DELETE` | `DELETE FROM database.table WHERE condition` | Deletes existing data from a table. | -| `INSERT` | `INSERT INTO database.table(column_name(s)) VALUES(value(s))` | Inserts new records into a table. | -| `UPDATE` | `UPDATE database.table SET column_1 = value_1, column_2 = value_2, .... WHERE condition` | Alters existing records in a table. | diff --git a/versioned_docs/version-4.7/reference/sql-guide/index.md b/versioned_docs/version-4.7/reference/sql-guide/index.md deleted file mode 100644 index 52f245ab..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/index.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: SQL Guide ---- - -# SQL Guide - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -## Harper SQL Guide - -The purpose of this guide is to describe the available functionality of Harper as it relates to supported SQL functionality. The SQL parser is still actively being developed, many SQL features may not be optimized or utilize indexes. This document will be updated as more features and functionality becomes available. Generally, the REST interface provides a more stable, secure, and performant interface for data interaction, but the SQL functionality can be useful for administrative ad-hoc querying, and utilizing existing SQL statements. **A high-level view of supported features can be found** [**here**](sql-guide/features-matrix)**.** - -Harper adheres to the concept of database & tables. This allows developers to isolate table structures from each other all within one database. - -## Select - -Harper has robust SELECT support, from simple queries all the way to complex joins with multi-conditions, aggregates, grouping & ordering. - -All results are returned as JSON object arrays. - -Query for all records and attributes in the dev.dog table: - -``` -SELECT * FROM dev.dog -``` - -Query specific columns from all rows in the dev.dog table: - -``` -SELECT id, dog_name, age FROM dev.dog -``` - -Query for all records and attributes in the dev.dog table ORDERED BY age in ASC order: - -``` -SELECT * FROM dev.dog ORDER BY age -``` - -_The ORDER BY keyword sorts in ascending order by default. To sort in descending order, use the DESC keyword._ - -## Insert - -Harper supports inserting 1 to n records into a table. The primary key must be unique (not used by any other record). If no primary key is provided, it will be assigned an auto-generated UUID. Harper does not support selecting from one table to insert into another at this time. - -``` -INSERT INTO dev.dog (id, dog_name, age, breed_id) - VALUES(1, 'Penny', 5, 347), (2, 'Kato', 4, 347) -``` - -## Update - -Harper supports updating existing table row(s) via UPDATE statements. Multiple conditions can be applied to filter the row(s) to update. At this time selecting from one table to update another is not supported. - -``` -UPDATE dev.dog - SET owner_name = 'Kyle' - WHERE id IN (1, 2) -``` - -## Delete - -Harper supports deleting records from a table with condition support. - -``` -DELETE FROM dev.dog - WHERE age < 4 -``` - -## Joins - -Harper allows developers to join any number of tables and currently supports the following join types: - -- INNER JOIN LEFT -- INNER JOIN LEFT -- OUTER JOIN - -Here’s a basic example joining two tables from our Get Started example- joining a dogs table with a breeds table: - -``` -SELECT d.id, d.dog_name, d.owner_name, b.name, b.section - FROM dev.dog AS d - INNER JOIN dev.breed AS b ON d.breed_id = b.id - WHERE d.owner_name IN ('Kyle', 'Zach', 'Stephen') - AND b.section = 'Mutt' - ORDER BY d.dog_name -``` diff --git a/versioned_docs/version-4.7/reference/sql-guide/json-search.md b/versioned_docs/version-4.7/reference/sql-guide/json-search.md deleted file mode 100644 index 1c0c396b..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/json-search.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -title: SQL JSON Search ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL JSON Search - -Harper automatically indexes all top level attributes in a row / object written to a table. However, any attributes which hold JSON data do not have their nested attributes indexed. In order to make searching and/or transforming these JSON documents easy, Harper offers a special SQL function called SEARCH_JSON. The SEARCH_JSON function works in SELECT & WHERE clauses allowing queries to perform powerful filtering on any element of your JSON by implementing the [JSONata library](https://docs.jsonata.org/overview.html) into our SQL engine. - -## Syntax - -`SEARCH_JSON(expression, attribute)` - -Executes the supplied string _expression_ against data of the defined top level _attribute_ for each row. The expression both filters and defines output from the JSON document. - -### Example 1 - -#### Search a string array - -Here are two records in the database: - -```json -[ - { - "id": 1, - "name": ["Harper", "Penny"] - }, - { - "id": 2, - "name": ["Penny"] - } -] -``` - -Here is a simple query that gets any record with "Harper" found in the name. - -``` -SELECT * -FROM dev.dog -WHERE search_json('"Harper" in *', name) -``` - -### Example 2 - -The purpose of this query is to give us every movie where at least two of our favorite actors from Marvel films have acted together. The results will return the movie title, the overview, release date and an object array of the actor’s name and their character name in the movie. - -Both function calls evaluate the credits.cast attribute, this attribute is an object array of every cast member in a movie. - -``` -SELECT m.title, - m.overview, - m.release_date, - SEARCH_JSON($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]].{"actor": name, "character": character}, c.`cast`) AS characters -FROM movies.credits c - INNER JOIN movies.movie m - ON c.movie_id = m.id -WHERE SEARCH_JSON($count($[name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"]]), c.`cast`) >= 2 -``` - -A sample of this data from the movie The Avengers looks like - -```json -[ - { - "cast_id": 46, - "character": "Tony Stark / Iron Man", - "credit_id": "52fe4495c3a368484e02b251", - "gender": "male", - "id": 3223, - "name": "Robert Downey Jr.", - "order": 0 - }, - { - "cast_id": 2, - "character": "Steve Rogers / Captain America", - "credit_id": "52fe4495c3a368484e02b19b", - "gender": "male", - "id": 16828, - "name": "Chris Evans", - "order": 1 - }, - { - "cast_id": 307, - "character": "Bruce Banner / The Hulk", - "credit_id": "5e85e8083344c60015411cfa", - "gender": "male", - "id": 103, - "name": "Mark Ruffalo", - "order": 2 - } -] -``` - -Let’s break down the SEARCH_JSON function call in the SELECT: - -``` -SEARCH_JSON( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]].{ - "actor": name, - "character": character - }, - c.`cast` -) -``` - -The first argument passed to SEARCH_JSON is the expression to execute against the second argument which is the cast attribute on the credits table. This expression will execute for every row. Looking into the expression it starts with "$[…]" this tells the expression to iterate all elements of the cast array. - -Then the expression tells the function to only return entries where the name attribute matches any of the actors defined in the array: - -``` -name in ["Robert Downey Jr.", "Chris Evans", "Scarlett Johansson", "Mark Ruffalo", "Chris Hemsworth", "Jeremy Renner", "Clark Gregg", "Samuel L. Jackson", "Gwyneth Paltrow", "Don Cheadle"] -``` - -So far, we’ve iterated the array and filtered out rows, but we also want the results formatted in a specific way, so we’ve chained an expression on our filter with: `{"actor": name, "character": character}`. This tells the function to create a specific object for each matching entry. - -**Sample Result** - -```json -[ - { - "actor": "Robert Downey Jr.", - "character": "Tony Stark / Iron Man" - }, - { - "actor": "Chris Evans", - "character": "Steve Rogers / Captain America" - }, - { - "actor": "Mark Ruffalo", - "character": "Bruce Banner / The Hulk" - } -] -``` - -Just having the SEARCH_JSON function in our SELECT is powerful, but given our criteria it would still return every other movie that doesn’t have our matching actors, in order to filter out the movies we do not want we also use SEARCH_JSON in the WHERE clause. - -This function call in the WHERE clause is similar, but we don’t need to perform the same transformation as occurred in the SELECT: - -``` -SEARCH_JSON( - $count( - $[name in [ - "Robert Downey Jr.", - "Chris Evans", - "Scarlett Johansson", - "Mark Ruffalo", - "Chris Hemsworth", - "Jeremy Renner", - "Clark Gregg", - "Samuel L. Jackson", - "Gwyneth Paltrow", - "Don Cheadle" - ]] - ), - c.`cast` -) >= 2 -``` - -As seen above we execute the same name filter against the cast array, the primary difference is we are wrapping the filtered results in $count(…). As it looks this returns a count of the results back which we then use against our SQL comparator of >= 2. - -To see further SEARCH_JSON examples in action view our Postman Collection that provides a [sample database & data with query examples](../../developers/operations-api/advanced-json-sql-examples). - -To learn more about how to build expressions check out the JSONata documentation: [https://docs.jsonata.org/overview](https://docs.jsonata.org/overview) diff --git a/versioned_docs/version-4.7/reference/sql-guide/reserved-word.md b/versioned_docs/version-4.7/reference/sql-guide/reserved-word.md deleted file mode 100644 index 2cd812ba..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/reserved-word.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: Harper SQL Reserved Words ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# Harper SQL Reserved Words - -This is a list of reserved words in the SQL Parser. Use of these words or symbols may result in unexpected behavior or inaccessible tables/attributes. If any of these words must be used, any SQL call referencing a database, table, or attribute must have backticks (`…`) or brackets ([…]) around the variable. - -For Example, for a table called `ASSERT` in the `data` database, a SQL select on that table would look like: - -``` -SELECT * from data.`ASSERT` -``` - -Alternatively: - -``` -SELECT * from data.[ASSERT] -``` - -### RESERVED WORD LIST - -- ABSOLUTE -- ACTION -- ADD -- AGGR -- ALL -- ALTER -- AND -- ANTI -- ANY -- APPLY -- ARRAY -- AS -- ASSERT -- ASC -- ATTACH -- AUTOINCREMENT -- AUTO_INCREMENT -- AVG -- BEGIN -- BETWEEN -- BREAK -- BY -- CALL -- CASE -- CAST -- CHECK -- CLASS -- CLOSE -- COLLATE -- COLUMN -- COLUMNS -- COMMIT -- CONSTRAINT -- CONTENT -- CONTINUE -- CONVERT -- CORRESPONDING -- COUNT -- CREATE -- CROSS -- CUBE -- CURRENT_TIMESTAMP -- CURSOR -- DATABASE -- DECLARE -- DEFAULT -- DELETE -- DELETED -- DESC -- DETACH -- DISTINCT -- DOUBLEPRECISION -- DROP -- ECHO -- EDGE -- END -- ENUM -- ELSE -- EXCEPT -- EXISTS -- EXPLAIN -- FALSE -- FETCH -- FIRST -- FOREIGN -- FROM -- GO -- GRAPH -- GROUP -- GROUPING -- HAVING -- HDB_HASH -- HELP -- IF -- IDENTITY -- IS -- IN -- INDEX -- INNER -- INSERT -- INSERTED -- INTERSECT -- INTO -- JOIN -- KEY -- LAST -- LET -- LEFT -- LIKE -- LIMIT -- LOOP -- MATCHED -- MATRIX -- MAX -- MERGE -- MIN -- MINUS -- MODIFY -- NATURAL -- NEXT -- NEW -- NOCASE -- NO -- NOT -- NULL -- OFF -- ON -- ONLY -- OFFSET -- OPEN -- OPTION -- OR -- ORDER -- OUTER -- OVER -- PATH -- PARTITION -- PERCENT -- PLAN -- PRIMARY -- PRINT -- PRIOR -- QUERY -- READ -- RECORDSET -- REDUCE -- REFERENCES -- RELATIVE -- REPLACE -- REMOVE -- RENAME -- REQUIRE -- RESTORE -- RETURN -- RETURNS -- RIGHT -- ROLLBACK -- ROLLUP -- ROW -- SCHEMA -- SCHEMAS -- SEARCH -- SELECT -- SEMI -- SET -- SETS -- SHOW -- SOME -- SOURCE -- STRATEGY -- STORE -- SYSTEM -- SUM -- TABLE -- TABLES -- TARGET -- TEMP -- TEMPORARY -- TEXTSTRING -- THEN -- TIMEOUT -- TO -- TOP -- TRAN -- TRANSACTION -- TRIGGER -- TRUE -- TRUNCATE -- UNION -- UNIQUE -- UPDATE -- USE -- USING -- VALUE -- VERTEX -- VIEW -- WHEN -- WHERE -- WHILE -- WITH -- WORK diff --git a/versioned_docs/version-4.7/reference/sql-guide/sql-geospatial-functions.md b/versioned_docs/version-4.7/reference/sql-guide/sql-geospatial-functions.md deleted file mode 100644 index f0c571da..00000000 --- a/versioned_docs/version-4.7/reference/sql-guide/sql-geospatial-functions.md +++ /dev/null @@ -1,419 +0,0 @@ ---- -title: SQL Geospatial Functions ---- - -:::warning -Harper encourages developers to utilize other querying tools over SQL for performance purposes. Harper SQL is intended for data investigation purposes and uses cases where performance is not a priority. SQL optimizations are on our roadmap for the future. -::: - -# SQL Geospatial Functions - -Harper geospatial features require data to be stored in a single column using the [GeoJSON standard](https://geojson.org/), a standard commonly used in geospatial technologies. Geospatial functions are available to be used in SQL statements. - -If you are new to GeoJSON you should check out the full specification here: https://geojson.org/. There are a few important things to point out before getting started. - -1. All GeoJSON coordinates are stored in `[longitude, latitude]` format. -1. Coordinates or GeoJSON geometries must be passed as string when written directly in a SQL statement. -1. Note if you are using Postman for you testing. Due to limitations in the Postman client, you will need to escape quotes in your strings and your SQL will need to be passed on a single line. - -In the examples contained in the left-hand navigation, database and table names may change, but all GeoJSON data will be stored in a column named geo_data. - -# geoArea - -The geoArea() function returns the area of one or more features in square meters. - -### Syntax - -geoArea(_geoJSON_) - -### Parameters - -| Parameter | Description | -| --------- | ------------------------------- | -| geoJSON | Required. One or more features. | - -#### Example 1 - -Calculate the area, in square meters, of a manually passed GeoJSON polygon. - -``` -SELECT geoArea('{ - "type":"Feature", - "geometry":{ - "type":"Polygon", - "coordinates":[[ - [0,0], - [0.123456,0], - [0.123456,0.123456], - [0,0.123456] - ]] - } -}') -``` - -#### Example 2 - -Find all records that have an area less than 1 square mile (or 2589988 square meters). - -``` -SELECT * FROM dev.locations -WHERE geoArea(geo_data) < 2589988 -``` - -# geoLength - -Takes a GeoJSON and measures its length in the specified units (default is kilometers). - -## Syntax - -geoLength(_geoJSON_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| geoJSON | Required. GeoJSON to measure. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the length, in kilometers, of a manually passed GeoJSON linestring. - -``` -SELECT geoLength('{ - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.97963309288025,39.76163265441438], - [-104.9823260307312,39.76365323407955], - [-104.99193906784058,39.75616442110704] - ] - } -}') -``` - -### Example 2 - -Find all data plus the calculated length in miles of the GeoJSON, restrict the response to only lengths less than 5 miles, and return the data in order of lengths smallest to largest. - -``` -SELECT *, geoLength(geo_data, 'miles') as length -FROM dev.locations -WHERE geoLength(geo_data, 'miles') < 5 -ORDER BY length ASC -``` - -# geoDifference - -Returns a new polygon with the difference of the second polygon clipped from the first polygon. - -## Syntax - -geoDifference(_polygon1, polygon2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------------------------------------------- | -| polygon1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| polygon2 | Required. Polygon or MultiPolygon GeoJSON feature to remove from polygon1. | - -### Example - -Return a GeoJSON Polygon that removes City Park (_polygon2_) from Colorado (_polygon1_). - -``` -SELECT geoDifference('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267215955], - [-102.01904296874999,37.00255267215955], - [-102.01904296874999,41.0130657870063], - [-109.072265625,41.0130657870063], - [-109.072265625,37.00255267215955] - ]] - } - }', - '{ - "type": "Feature", - "properties": { - "name":"City Park" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.95973110198975,39.7543828214657], - [-104.95955944061278,39.744781185675386], - [-104.95904445648193,39.74422022399989], - [-104.95835781097412,39.74402223643582], - [-104.94097709655762,39.74392324244047], - [-104.9408483505249,39.75434982844515], - [-104.95973110198975,39.7543828214657] - ]] - } - }' -) -``` - -# geoDistance - -Calculates the distance between two points in units (default is kilometers). - -## Syntax - -geoDistance(_point1, point2_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Calculate the distance, in miles, between Harper’s headquarters and the Washington Monument. - -``` -SELECT geoDistance('[-104.979127,39.761563]', '[-77.035248,38.889475]', 'miles') -``` - -### Example 2 - -Find all locations that are within 40 kilometers of a given point, return that distance in miles, and sort by distance in an ascending order. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoDistance('[-104.979127,39.761563]', geo_data, 'kilometers') < 40 -ORDER BY distance ASC -``` - -# geoNear - -Determines if point1 and point2 are within a specified distance from each other, default units are kilometers. Returns a Boolean. - -## Syntax - -geoNear(_point1, point2, distance_[_, units_]) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------------------------------------------- | -| point1 | Required. GeoJSON Point specifying the origin. | -| point2 | Required. GeoJSON Point specifying the destination. | -| distance | Required. The maximum distance in units as an integer or decimal. | -| units | Optional. Specified as a string. Options are ‘degrees’, ‘radians’, ‘miles’, or ‘kilometers’. Default is ‘kilometers’. | - -### Example 1 - -Return all locations within 50 miles of a given point. - -``` -SELECT * -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 50, 'miles') -``` - -### Example 2 - -Return all locations within 2 degrees of the earth of a given point. (Each degree lat/long is about 69 miles [111 kilometers]). Return all data and the distance in miles, sorted by ascending distance. - -``` -SELECT *, geoDistance('[-104.979127,39.761563]', geo_data, 'miles') as distance -FROM dev.locations -WHERE geoNear('[-104.979127,39.761563]', geo_data, 2, 'degrees') -ORDER BY distance ASC -``` - -# geoContains - -Determines if geo2 is completely contained by geo1. Returns a Boolean. - -## Syntax - -geoContains(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | --------------------------------------------------------------------------------- | -| geo1 | Required. Polygon or MultiPolygon GeoJSON feature. | -| geo2 | Required. Polygon or MultiPolygon GeoJSON feature tested to be contained by geo1. | - -### Example 1 - -Return all locations within the state of Colorado (passed as a GeoJSON string). - -``` -SELECT * -FROM dev.locations -WHERE geoContains('{ - "type": "Feature", - "properties": { - "name":"Colorado" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-109.072265625,37.00255267], - [-102.01904296874999,37.00255267], - [-102.01904296874999,41.01306579], - [-109.072265625,41.01306579], - [-109.072265625,37.00255267] - ]] - } -}', geo_data) -``` - -### Example 2 - -Return all locations which contain Harper Headquarters. - -``` -SELECT * -FROM dev.locations -WHERE geoContains(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoEqual - -Determines if two GeoJSON features are the same type and have identical X,Y coordinate values. For more information see https://developers.arcgis.com/documentation/spatial-references/. Returns a Boolean. - -## Syntax - -geoEqual(_geo1_, _geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find Harper Headquarters within all locations within the database. - -``` -SELECT * -FROM dev.locations -WHERE geoEqual(geo_data, '{ - "type": "Feature", - "properties": { - "name": "Harper Headquarters" - }, - "geometry": { - "type": "Polygon", - "coordinates": [[ - [-104.98060941696167,39.760704817357905], - [-104.98053967952728,39.76065120861263], - [-104.98055577278137,39.760642961109674], - [-104.98037070035934,39.76049450588716], - [-104.9802714586258,39.76056254790385], - [-104.9805235862732,39.76076461167841], - [-104.98060941696167,39.760704817357905] - ]] - } -}') -``` - -# geoCrosses - -Determines if the geometries cross over each other. Returns boolean. - -## Syntax - -geoCrosses(_geo1, geo2_) - -## Parameters - -| Parameter | Description | -| --------- | -------------------------------------- | -| geo1 | Required. GeoJSON geometry or feature. | -| geo2 | Required. GeoJSON geometry or feature. | - -### Example - -Find all locations that cross over a highway. - -``` -SELECT * -FROM dev.locations -WHERE geoCrosses( - geo_data, - '{ - "type": "Feature", - "properties": { - "name": "Highway I-25" - }, - "geometry": { - "type": "LineString", - "coordinates": [ - [-104.9139404296875,41.00477542222947], - [-105.0238037109375,39.715638134796336], - [-104.853515625,39.53370327008705], - [-104.853515625,38.81403111409755], - [-104.61181640625,38.39764411353178], - [-104.8974609375,37.68382032669382], - [-104.501953125,37.00255267215955] - ] - } - }' -) -``` - -# geoConvert - -Converts a series of coordinates into a GeoJSON of the specified type. - -## Syntax - -geoConvert(_coordinates, geo_type_[, _properties_]) - -## Parameters - -| Parameter | Description | -| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| coordinates | Required. One or more coordinates | -| geo_type | Required. GeoJSON geometry type. Options are ‘point’, ‘lineString’, ‘multiLineString’, ‘multiPoint’, ‘multiPolygon’, and ‘polygon’ | -| properties | Optional. Escaped JSON array with properties to be added to the GeoJSON output. | - -### Example - -Convert a given coordinate into a GeoJSON point with specified properties. - -``` -SELECT geoConvert( - '[-104.979127,39.761563]', - 'point', - '{ - "name": "Harper Headquarters" - }' -) -``` diff --git a/versioned_docs/version-4.7/reference/storage-algorithm.md b/versioned_docs/version-4.7/reference/storage-algorithm.md deleted file mode 100644 index 03c4c014..00000000 --- a/versioned_docs/version-4.7/reference/storage-algorithm.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Storage Algorithm ---- - -# Storage Algorithm - -The Harper storage algorithm is fundamental to the Harper core functionality, enabling the [Dynamic Schema](dynamic-schema) and all other user-facing functionality. Harper is built on top of Lightning Memory-Mapped Database (LMDB), a key-value store offering industry leading performance and functionality, which allows for our storage algorithm to store data in tables as rows/objects. This document will provide additional details on how data is stored within Harper. - -## Query Language Agnostic - -The Harper storage algorithm was designed to abstract the data storage from any individual query language. Harper currently supports both SQL and NoSQL on top of this storage algorithm, with the ability to add additional query languages in the future. This means data can be inserted via NoSQL and read via SQL while hitting the same underlying data storage. - -## ACID Compliant - -Utilizing Multi-Version Concurrency Control (MVCC) through LMDB, Harper offers ACID compliance independently on each node. Readers and writers operate independently of each other, meaning readers don’t block writers and writers don’t block readers. Each Harper table has a single writer process, avoiding deadlocks and assuring that writes are executed in the order in which they were received. Harper tables can have multiple reader processes operating at the same time for consistent, high scale reads. - -## Universally Indexed - -All top level attributes are automatically indexed immediately upon ingestion. The [Harper Dynamic Schema](dynamic-schema) reflexively creates both the attribute and index reflexively as new schema metadata comes in. Indexes are agnostic of datatype, honoring the following order: booleans, numbers ordered naturally, strings ordered lexically. Within the LMDB implementation, table records are grouped together into a single LMDB environment file, where each attribute index is a sub-database (dbi) inside said environment file. An example of the indexing scheme can be seen below. - -## Additional LMDB Benefits - -Harper inherits both functional and performance benefits by implementing LMDB as the underlying key-value store. Data is memory-mapped, which enables quick data access without data duplication. All writers are fully serialized, making writes deadlock-free. LMDB is built to maximize operating system features and functionality, fully exploiting buffer cache and built to run in CPU cache. To learn more about LMDB, visit their documentation. - -## Harper Indexing Example (Single Table) - -![](/img/v4.6/reference/HarperDB-3.0-Storage-Algorithm.png.webp) diff --git a/versioned_docs/version-4.7/reference/transactions.md b/versioned_docs/version-4.7/reference/transactions.md deleted file mode 100644 index 7e8546fb..00000000 --- a/versioned_docs/version-4.7/reference/transactions.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: Transactions ---- - -# Transactions - -Transactions are an important part of robust handling of data in data-driven applications. Harper provides ACID-compliant support for transactions, allowing for guaranteed atomic, consistent, and isolated data handling within transactions, with durability guarantees on commit. Understanding how transactions are tracked and behave is important for properly leveraging transactional support in Harper. For most operations this is very intuitive, each HTTP request is executed in a transaction, so when multiple actions are executed in a single request, they are normally automatically included in the same transaction. - -Transactions span a database. Once a read snapshot is started, it is an atomic snapshot of all the tables in a database. And writes that span multiple tables in the database will all be committed atomically together (no writes in one table will be visible before writes in another table in the same database). If a transaction is used to access or write data in multiple databases, there will actually be a separate database transaction used for each database, and there is no guarantee of atomicity between separate transactions in separate databases. This can be an important consideration when deciding if and how tables should be organized into different databases. - -Because Harper is designed to be a low-latency distributed database, locks are avoided in data handling. Because of this, transactions do not lock data within the transaction. When a transaction starts, it will provide a read snapshot of the database for any retrievals or queries, which means all reads will be performed on a single version of the database isolated from any other writes that are concurrently taking place. And within a transaction all writes are aggregated and atomically written on commit. These writes are all isolated (from other transactions) until committed, and all become visible atomically. However, because transactions are non-locking, it is possible that writes from other transactions may occur between when reads are performed and when the writes are committed (at which point the last write will win for any records that have been written concurrently). Support for locks in transactions is planned for a future release. - -Transactions can also be explicitly started using the `transaction` global function that is provided in the Harper environment: - -## `transaction(context?, callback: (transaction) => any): Promise` - -This executes the callback in a transaction, providing a context that can be used for any resource methods that are called. This returns a promise for when the transaction has been committed. The callback itself may be asynchronous (return a promise), allowing for asynchronous activity within the transaction. This is useful for starting a transaction when your code is not already running within a transaction (in an HTTP request handler, a transaction will typically already be started). For example, if we wanted to run an action on a timer that periodically loads data, we could ensure that the data is loaded in single transactions like this (note that HDB is multi-threaded and if we do a timer-based job, we very likely want it to only run in one thread): - -```javascript -import { tables } from 'harperdb'; -const { MyTable } = tables; -if (isMainThread) // only on main thread - setInterval(async () => { - let someData = await (await fetch(... some URL ...)).json(); - transaction((txn) => { - for (let item in someData) { - MyTable.put(item, txn); - } - }); - }, 3600000); // every hour -``` - -You can provide your own context object for the transaction to attach to. If you call `transaction` with a context that already has a transaction started, it will simply use the current transaction, execute the callback and immediately return (this can be useful for ensuring that a transaction has started). - -Once the transaction callback is completed (for non-nested transaction calls), the transaction will commit, and if the callback throws an error, the transaction will abort. However, the callback is called with the `transaction` object, which also provides the following methods and property: - -- `commit(): Promise` - Commits the current transaction. The transaction will be committed once the returned promise resolves. -- `abort(): void` - Aborts the current transaction and resets it. -- `resetReadSnapshot(): void` - Resets the read snapshot for the transaction, resetting to the latest data in the database. -- `timestamp: number` - This is the timestamp associated with the current transaction. From 0ebea43acfc82215ff5d44d14ee8d40922bf4f63 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 16:12:36 -0600 Subject: [PATCH 37/51] copy new content to reference/ --- reference/analytics/operations.md | 136 +++ reference/analytics/overview.md | 207 ++++ reference/cli/authentication.md | 237 +++++ reference/cli/commands.md | 269 +++++ reference/cli/operations-api-commands.md | 380 +++++++ reference/cli/overview.md | 200 ++++ reference/components/applications.md | 336 ++++++ reference/components/extension-api.md | 186 ++++ .../components/javascript-environment.md | 83 ++ reference/components/overview.md | 170 +++ reference/components/plugin-api.md | 423 ++++++++ reference/configuration/operations.md | 141 +++ reference/configuration/options.md | 319 ++++++ reference/configuration/overview.md | 209 ++++ reference/database/api.md | 243 +++++ reference/database/compaction.md | 71 ++ reference/database/data-loader.md | 216 ++++ reference/database/jobs.md | 272 +++++ reference/database/overview.md | 123 +++ reference/database/schema.md | 503 +++++++++ reference/database/sql.md | 345 ++++++ reference/database/storage-algorithm.md | 111 ++ reference/database/system-tables.md | 158 +++ reference/database/transaction.md | 154 +++ reference/environment-variables/overview.md | 77 ++ reference/fastify-routes/overview.md | 127 +++ reference/graphql-querying/overview.md | 248 +++++ reference/http/api.md | 401 +++++++ reference/http/configuration.md | 342 ++++++ reference/http/overview.md | 64 ++ reference/http/tls.md | 119 +++ reference/legacy/cloud.md | 11 + reference/legacy/custom-functions.md | 13 + reference/logging/api.md | 153 +++ reference/logging/configuration.md | 370 +++++++ reference/logging/operations.md | 91 ++ reference/logging/overview.md | 92 ++ reference/mqtt/configuration.md | 231 ++++ reference/mqtt/overview.md | 142 +++ reference/operations-api/operations.md | 990 ++++++++++++++++++ reference/operations-api/overview.md | 87 ++ reference/replication/clustering.md | 318 ++++++ reference/replication/overview.md | 309 ++++++ reference/replication/sharding.md | 209 ++++ reference/resources/overview.md | 130 +++ reference/resources/query-optimization.md | 219 ++++ reference/resources/resource-api.md | 635 +++++++++++ reference/rest/content-types.md | 100 ++ reference/rest/headers.md | 97 ++ reference/rest/overview.md | 159 +++ reference/rest/querying.md | 261 +++++ reference/rest/server-sent-events.md | 64 ++ reference/rest/websockets.md | 106 ++ reference/security/api.md | 23 + reference/security/basic-authentication.md | 58 + reference/security/certificate-management.md | 152 +++ .../security/certificate-verification.md | 449 ++++++++ reference/security/configuration.md | 69 ++ reference/security/jwt-authentication.md | 118 +++ reference/security/mtls-authentication.md | 80 ++ reference/security/overview.md | 55 + reference/static-files/overview.md | 174 +++ reference/studio/overview.md | 37 + reference/users-and-roles/configuration.md | 67 ++ reference/users-and-roles/operations.md | 176 ++++ reference/users-and-roles/overview.md | 253 +++++ 66 files changed, 13368 insertions(+) create mode 100644 reference/analytics/operations.md create mode 100644 reference/analytics/overview.md create mode 100644 reference/cli/authentication.md create mode 100644 reference/cli/commands.md create mode 100644 reference/cli/operations-api-commands.md create mode 100644 reference/cli/overview.md create mode 100644 reference/components/applications.md create mode 100644 reference/components/extension-api.md create mode 100644 reference/components/javascript-environment.md create mode 100644 reference/components/overview.md create mode 100644 reference/components/plugin-api.md create mode 100644 reference/configuration/operations.md create mode 100644 reference/configuration/options.md create mode 100644 reference/configuration/overview.md create mode 100644 reference/database/api.md create mode 100644 reference/database/compaction.md create mode 100644 reference/database/data-loader.md create mode 100644 reference/database/jobs.md create mode 100644 reference/database/overview.md create mode 100644 reference/database/schema.md create mode 100644 reference/database/sql.md create mode 100644 reference/database/storage-algorithm.md create mode 100644 reference/database/system-tables.md create mode 100644 reference/database/transaction.md create mode 100644 reference/environment-variables/overview.md create mode 100644 reference/fastify-routes/overview.md create mode 100644 reference/graphql-querying/overview.md create mode 100644 reference/http/api.md create mode 100644 reference/http/configuration.md create mode 100644 reference/http/overview.md create mode 100644 reference/http/tls.md create mode 100644 reference/legacy/cloud.md create mode 100644 reference/legacy/custom-functions.md create mode 100644 reference/logging/api.md create mode 100644 reference/logging/configuration.md create mode 100644 reference/logging/operations.md create mode 100644 reference/logging/overview.md create mode 100644 reference/mqtt/configuration.md create mode 100644 reference/mqtt/overview.md create mode 100644 reference/operations-api/operations.md create mode 100644 reference/operations-api/overview.md create mode 100644 reference/replication/clustering.md create mode 100644 reference/replication/overview.md create mode 100644 reference/replication/sharding.md create mode 100644 reference/resources/overview.md create mode 100644 reference/resources/query-optimization.md create mode 100644 reference/resources/resource-api.md create mode 100644 reference/rest/content-types.md create mode 100644 reference/rest/headers.md create mode 100644 reference/rest/overview.md create mode 100644 reference/rest/querying.md create mode 100644 reference/rest/server-sent-events.md create mode 100644 reference/rest/websockets.md create mode 100644 reference/security/api.md create mode 100644 reference/security/basic-authentication.md create mode 100644 reference/security/certificate-management.md create mode 100644 reference/security/certificate-verification.md create mode 100644 reference/security/configuration.md create mode 100644 reference/security/jwt-authentication.md create mode 100644 reference/security/mtls-authentication.md create mode 100644 reference/security/overview.md create mode 100644 reference/static-files/overview.md create mode 100644 reference/studio/overview.md create mode 100644 reference/users-and-roles/configuration.md create mode 100644 reference/users-and-roles/operations.md create mode 100644 reference/users-and-roles/overview.md diff --git a/reference/analytics/operations.md b/reference/analytics/operations.md new file mode 100644 index 00000000..e82d3086 --- /dev/null +++ b/reference/analytics/operations.md @@ -0,0 +1,136 @@ +--- +id: operations +title: Analytics Operations +--- + + + + + +Operations for querying Harper analytics data. All operations require `superuser` permission. + +Analytics data can also be queried directly via `search_by_conditions` on the `hdb_raw_analytics` and `hdb_analytics` tables in the `system` database — see [Analytics Overview](./overview) for details on the table structure. + +--- + +## `list_metrics` + +Returns the list of available metric names that can be queried with `get_analytics`. + +### Parameters + +| Parameter | Required | Type | Description | +| -------------- | -------- | -------- | ------------------------------------------------------------------------ | +| `operation` | Yes | string | Must be `"list_metrics"` | +| `metric_types` | No | string[] | Filter by type: `"builtin"`, `"custom"`, or both. Default: `["builtin"]` | + +### Request + +```json +{ + "operation": "list_metrics", + "metric_types": ["custom", "builtin"] +} +``` + +### Response + +```json +["resource-usage", "table-size", "database-size", "main-thread-utilization", "utilization", "storage-volume"] +``` + +--- + +## `describe_metric` + +Returns the structure and available attributes for a specific metric. + +### Parameters + +| Parameter | Required | Type | Description | +| ----------- | -------- | ------ | ------------------------------ | +| `operation` | Yes | string | Must be `"describe_metric"` | +| `metric` | Yes | string | Name of the metric to describe | + +### Request + +```json +{ + "operation": "describe_metric", + "metric": "resource-usage" +} +``` + +### Response + +```json +{ + "attributes": [ + { "name": "id", "type": "number" }, + { "name": "metric", "type": "string" }, + { "name": "userCPUTime", "type": "number" }, + { "name": "systemCPUTime", "type": "number" }, + { "name": "node", "type": "string" } + ] +} +``` + +--- + +## `get_analytics` + +Queries analytics data for a specific metric over a time range. + +### Parameters + +| Parameter | Required | Type | Description | +| ---------------- | -------- | -------- | ----------------------------------------------------------------------------------------------------------------------- | +| `operation` | Yes | string | Must be `"get_analytics"` | +| `metric` | Yes | string | Metric name — use `list_metrics` to get valid values | +| `start_time` | No | number | Start of time range as Unix timestamp in milliseconds | +| `end_time` | No | number | End of time range as Unix timestamp in milliseconds | +| `get_attributes` | No | string[] | Attributes to include in each result. If omitted, all attributes are returned | +| `conditions` | No | object[] | Additional filter conditions. Same format as [`search_by_conditions`](../operations-api/operations.md#nosql-operations) | + +### Request + +```json +{ + "operation": "get_analytics", + "metric": "resource-usage", + "start_time": 1769198332754, + "end_time": 1769198532754, + "get_attributes": ["id", "metric", "userCPUTime", "systemCPUTime"], + "conditions": [ + { + "attribute": "node", + "operator": "equals", + "value": "node1.example.com" + } + ] +} +``` + +### Response + +```json +[ + { + "id": "12345", + "metric": "resource-usage", + "userCPUTime": 100, + "systemCPUTime": 50 + }, + { + "id": "67890", + "metric": "resource-usage", + "userCPUTime": 150, + "systemCPUTime": 75 + } +] +``` + +## Related + +- [Analytics Overview](./overview) +- [Operations API Overview](../operations-api/overview.md) diff --git a/reference/analytics/overview.md b/reference/analytics/overview.md new file mode 100644 index 00000000..679e70f4 --- /dev/null +++ b/reference/analytics/overview.md @@ -0,0 +1,207 @@ +--- +id: overview +title: Analytics +--- + + + + + +Added in: v4.5.0 (resource and storage analytics) + +Harper collects real-time telemetry and statistics across all operations, URL endpoints, and messaging topics. This data can be used to monitor server health, understand traffic and usage patterns, identify resource-intensive queries, and inform scaling decisions. + +## Storage Tables + +Analytics data is stored in two system tables in the `system` database: + +| Table | Description | +| ------------------- | ------------------------------------------------------------------------------------------- | +| `hdb_raw_analytics` | Per-second raw entries recorded by each thread. One record per second per active thread. | +| `hdb_analytics` | Aggregate entries recorded once per minute, summarizing all per-second data across threads. | + +Both tables require `superuser` permission to query. + +## Raw Analytics (`hdb_raw_analytics`) + +Raw entries are recorded once per second (when there is activity) by each thread. Each record captures all activity in the last second along with system resource information. Records use the timestamp in milliseconds since epoch as the primary key. + +Query raw analytics using `search_by_conditions` on the `hdb_raw_analytics` table. The example below fetches 10 seconds of raw entries: + +```http +POST http://localhost:9925 +Content-Type: application/json + +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_raw_analytics", + "conditions": [{ + "search_attribute": "id", + "search_type": "between", + "search_value": [1688594000000, 1688594010000] + }] +} +``` + +Example raw entry: + +```json +{ + "time": 1688594390708, + "period": 1000.8336279988289, + "metrics": [ + { + "metric": "bytes-sent", + "path": "search_by_conditions", + "type": "operation", + "median": 202, + "mean": 202, + "p95": 202, + "p90": 202, + "count": 1 + }, + { + "metric": "memory", + "threadId": 2, + "rss": 1492664320, + "heapTotal": 124596224, + "heapUsed": 119563120, + "external": 3469790, + "arrayBuffers": 798721 + }, + { + "metric": "utilization", + "idle": 138227.52767700003, + "active": 70.5066209952347, + "utilization": 0.0005098165086230495 + } + ], + "threadId": 2, + "totalBytesProcessed": 12182820, + "id": 1688594390708.6853 +} +``` + +## Aggregate Analytics (`hdb_analytics`) + +Aggregate entries are recorded once per minute, combining per-second raw entries from all threads into a single summary record. Use `search_by_conditions` on the `hdb_analytics` table with a broader time range: + +```http +POST http://localhost:9925 +Content-Type: application/json + +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_analytics", + "conditions": [{ + "search_attribute": "id", + "search_type": "between", + "search_value": [1688194100000, 1688594990000] + }] +} +``` + +Example aggregate entry: + +```json +{ + "period": 60000, + "metric": "bytes-sent", + "method": "connack", + "type": "mqtt", + "median": 4, + "mean": 4, + "p95": 4, + "p90": 4, + "count": 1, + "id": 1688589569646, + "time": 1688589569646 +} +``` + +## Standard Metrics + +Harper automatically tracks the following metrics for all services. Applications can also define custom metrics via [`server.recordAnalytics()`](../http/api.md#serverrecordanalyticsvalue-metric-path-method-type). + +### HTTP Metrics + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ------------------ | ------------- | -------------- | --------------------------- | ----- | ---------------------------------------- | +| `duration` | resource path | request method | `cache-hit` or `cache-miss` | ms | Duration of request handler | +| `duration` | route path | request method | `fastify-route` | ms | Duration of Fastify route handler | +| `duration` | operation | | `operation` | ms | Duration of Operations API operation | +| `success` | resource path | request method | | % | Percentage of successful requests | +| `success` | route path | request method | `fastify-route` | % | | +| `success` | operation | | `operation` | % | | +| `bytes-sent` | resource path | request method | | bytes | Response bytes sent | +| `bytes-sent` | route path | request method | `fastify-route` | bytes | | +| `bytes-sent` | operation | | `operation` | bytes | | +| `transfer` | resource path | request method | `operation` | ms | Duration of response transfer | +| `transfer` | route path | request method | `fastify-route` | ms | | +| `transfer` | operation | | `operation` | ms | | +| `socket-routed` | | | | % | Percentage of sockets immediately routed | +| `tls-handshake` | | | | ms | TLS handshake duration | +| `tls-reused` | | | | % | Percentage of TLS sessions reused | +| `cache-hit` | table name | | | % | Percentage of cache hits | +| `cache-resolution` | table name | | | ms | Duration of resolving uncached entries | + +### MQTT / WebSocket Metrics + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ------------------ | ------ | ------------ | ------ | ----- | ------------------------------------------------ | +| `mqtt-connections` | | | | count | Number of open direct MQTT connections | +| `ws-connections` | | | | count | Number of open WebSocket connections | +| `connection` | `mqtt` | `connect` | | % | Percentage of successful direct MQTT connections | +| `connection` | `mqtt` | `disconnect` | | % | Percentage of explicit direct MQTT disconnects | +| `connection` | `ws` | `connect` | | % | Percentage of successful WebSocket connections | +| `connection` | `ws` | `disconnect` | | % | Percentage of explicit WebSocket disconnects | +| `bytes-sent` | topic | mqtt command | `mqtt` | bytes | Bytes sent for a given MQTT command and topic | + +### Replication Metrics + +| `metric` | `path` | `method` | `type` | Unit | Description | +| ---------------- | ------------- | ------------- | --------- | ----- | ----------------------------------- | +| `bytes-sent` | node.database | `replication` | `egress` | bytes | Bytes sent for replication | +| `bytes-sent` | node.database | `replication` | `blob` | bytes | Bytes sent for blob replication | +| `bytes-received` | node.database | `replication` | `ingress` | bytes | Bytes received for replication | +| `bytes-received` | node.database | `replication` | `blob` | bytes | Bytes received for blob replication | + +### Resource Usage Metrics + +| `metric` | Key attributes | Other | Unit | Description | +| ------------------------- | ------------------------------------------------------------------------------------------------ | ------------------- | ------- | --------------------------------------------------------------------------------- | +| `database-size` | `size`, `used`, `free`, `audit` | `database` | bytes | Database file size breakdown | +| `main-thread-utilization` | `idle`, `active`, `taskQueueLatency`, `rss`, `heapTotal`, `heapUsed`, `external`, `arrayBuffers` | `time` | various | Main thread resource usage: idle/active time, queue latency, and memory breakdown | +| `resource-usage` | (see below) | | various | Node.js process resource usage (see [resource-usage](#resource-usage-metric)) | +| `storage-volume` | `available`, `free`, `size` | `database` | bytes | Storage volume size breakdown | +| `table-size` | `size` | `database`, `table` | bytes | Table file size | +| `utilization` | | | % | Percentage of time the worker thread was processing requests | + +#### `resource-usage` Metric + +Includes everything returned by Node.js [`process.resourceUsage()`](https://nodejs.org/api/process.html#processresourceusage) (with `userCPUTime` and `systemCPUTime` converted to milliseconds), plus: + +| Field | Unit | Description | +| ---------------- | ---- | ------------------------------------------- | +| `time` | ms | Unix timestamp when the metric was recorded | +| `period` | ms | Duration of the measurement period | +| `cpuUtilization` | % | CPU utilization (user + system combined) | + +## Custom Metrics + +Applications can record custom metrics using the `server.recordAnalytics()` API. See [HTTP API](../http/api.md) for details. + +## Analytics Configuration + +The `analytics.aggregatePeriod` configuration option controls how frequently aggregate summaries are written. See [Configuration Overview](../configuration/overview.md) for details. + +Per-component analytics logging can be configured via `analytics.logging`. See [Logging Configuration](../logging/configuration.md) for details. + +## Related + +- [Analytics Operations](./operations) +- [HTTP API](../http/api.md) +- [Logging Configuration](../logging/configuration.md) +- [Configuration Overview](../configuration/overview.md) diff --git a/reference/cli/authentication.md b/reference/cli/authentication.md new file mode 100644 index 00000000..44a424b7 --- /dev/null +++ b/reference/cli/authentication.md @@ -0,0 +1,237 @@ +--- +title: CLI Authentication +--- + + + + +# CLI Authentication + +The Harper CLI handles authentication differently for local and remote operations. + +## Local Operations + +Available since: v4.1.0 + +For local operations (operations executed on the same machine where Harper is installed), the CLI communicates with Harper via Unix domain sockets instead of HTTP. Domain socket requests are automatically authenticated as the superuser, so no additional authentication parameters are required. + +**Example**: + +```bash +# No authentication needed for local operations +harper describe_database database=dev +harper get_components +harper set_configuration logging_level=info +``` + +When no `target` parameter is specified, the CLI defaults to using the local domain socket connection, providing secure, authenticated access to the local Harper instance. + +## Remote Operations + +Available since: v4.1.0; expanded in: v4.3.0 + +For remote operations (operations executed on a remote Harper instance via the `target` parameter), you must provide authentication credentials. + +### Authentication Methods + +#### Method 1: Environment Variables (Recommended) + +Set the following environment variables to avoid exposing credentials in command history: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=password +``` + +Then execute remote operations without including credentials in the command: + +```bash +harper describe_database database=dev target=https://server.com:9925 +harper get_components target=https://remote-instance.example.com:9925 +``` + +**Benefits**: + +- Credentials not visible in command history +- More secure for scripting +- Can be set once per session +- Supported by most CI/CD systems + +**Example Script**: + +```bash +#!/bin/bash + +# Set credentials from secure environment +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=$SECURE_PASSWORD # from secret manager + +# Execute operations +harper deploy target=https://prod-server.com:9925 replicated=true +harper restart target=https://prod-server.com:9925 replicated=true +``` + +#### Method 2: Command Parameters + +Provide credentials directly as command parameters: + +```bash +harper describe_database \ + database=dev \ + target=https://server.com:9925 \ + username=HDB_ADMIN \ + password=password +``` + +**Parameters**: + +- `username=` - Harper admin username +- `password=` - Harper admin password + +**Cautions**: + +- Credentials visible in command history +- Less secure for production environments +- Exposed in process listings +- Not recommended for scripts + +### Target Parameter + +The `target` parameter specifies the full HTTP/HTTPS URL of the remote Harper instance: + +**Format**: `target=://:` + +**Examples**: + +```bash +# HTTPS on default operations API port +target=https://server.example.com:9925 + +# HTTP (not recommended for production) +target=http://localhost:9925 + +# Custom port +target=https://server.example.com:8080 +``` + +## Security Best Practices + +### 1. Use Environment Variables + +Always use environment variables for credentials in scripts and automation: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=$SECURE_PASSWORD +``` + +### 2. Use HTTPS + +Always use HTTPS for remote operations to encrypt credentials in transit: + +```bash +# Good +target=https://server.com:9925 + +# Bad - credentials sent in plain text +target=http://server.com:9925 +``` + +### 3. Manage Secrets Securely + +Store credentials in secure secret management systems: + +- Environment variables from secret managers (AWS Secrets Manager, HashiCorp Vault, etc.) +- CI/CD secret storage (GitHub Secrets, GitLab CI Variables, etc.) +- Operating system credential stores + +**Example with AWS Secrets Manager**: + +```bash +#!/bin/bash + +# Retrieve credentials from AWS Secrets Manager +export CLI_TARGET_USERNAME=$(aws secretsmanager get-secret-value \ + --secret-id harper-admin-user \ + --query SecretString \ + --output text) + +export CLI_TARGET_PASSWORD=$(aws secretsmanager get-secret-value \ + --secret-id harper-admin-password \ + --query SecretString \ + --output text) + +# Execute operations +harper deploy target=https://prod.example.com:9925 +``` + +### 4. Use Least Privilege + +Create dedicated users with minimal required permissions for CLI operations instead of using the main admin account. See [Users and Roles](../users-and-roles/overview.md) for more information. + +### 5. Rotate Credentials + +Regularly rotate credentials, especially for automated systems and CI/CD pipelines. + +### 6. Audit Access + +Monitor and audit CLI operations, especially for production environments. See [Logging](../logging/overview.md) for more information on logging. + +## Troubleshooting + +### Authentication Failures + +If you receive authentication errors: + +1. **Verify credentials are correct**: + - Check username and password + - Ensure no extra whitespace + +2. **Verify the target URL**: + - Ensure the URL is correct and reachable + - Check the port number + - Verify HTTPS/HTTP protocol + +3. **Check network connectivity**: + + ```bash + curl https://server.com:9925 + ``` + +4. **Verify user permissions**: + - Ensure the user has permission to execute the operation + - Check user roles and permissions + +### Environment Variable Issues + +If environment variables aren't working: + +1. **Verify variables are set**: + + ```bash + echo $CLI_TARGET_USERNAME + echo $CLI_TARGET_PASSWORD + ``` + +2. **Export variables**: + Ensure you used `export`, not just assignment: + + ```bash + # Wrong - variable only available in current shell + CLI_TARGET_USERNAME=admin + + # Correct - variable available to child processes + export CLI_TARGET_USERNAME=admin + ``` + +3. **Check variable scope**: + - Variables must be exported before running commands + - Variables set in one terminal don't affect other terminals + +## See Also + +- [CLI Overview](./overview.md) - General CLI information +- [CLI Commands](./commands.md) - Core CLI commands +- [Operations API Commands](./operations-api-commands.md) - Operations available through CLI +- [Security Overview](../security/overview.md) - Harper security features +- [Users and Roles](../users-and-roles/overview.md) - User management diff --git a/reference/cli/commands.md b/reference/cli/commands.md new file mode 100644 index 00000000..3f1fdc94 --- /dev/null +++ b/reference/cli/commands.md @@ -0,0 +1,269 @@ +--- +title: CLI Commands +--- + + + + + + +# CLI Commands + +This page documents the core Harper CLI commands for managing Harper instances. For Operations API commands available through the CLI, see [Operations API Commands](./operations-api-commands.md). + +## Process Management Commands + +### `harper` + +Added in: v4.1.0 + +Run Harper in the foreground as a standard process. This is the recommended way to run Harper. + +```bash +harper +``` + +When you run `harper`: + +- If Harper is not installed, it will guide you through the installation process +- Once installed, it runs Harper in the foreground as a standard process, compatible with systemd, Docker, and other process management tools + +**First-Time Installation**: + +If Harper is not installed, you can provide configuration parameters via environment variables or command line arguments: + +**Using Environment Variables**: + +```bash +# Minimum required parameters for no additional CLI prompts +export TC_AGREEMENT=yes +export HDB_ADMIN_USERNAME=HDB_ADMIN +export HDB_ADMIN_PASSWORD=password +export ROOTPATH=/hdb/ +harper +``` + +:::note +If you specify `DEFAULT_MODE=dev` you will also need to specify the `REPLICATION_HOSTNAME=localhost` +::: + +**Using Command Line Arguments**: + +```bash +# Minimum required parameters for no additional CLI prompts +harper \ + --TC_AGREEMENT=yes \ + --HDB_ADMIN_USERNAME=HDB_ADMIN \ + --HDB_ADMIN_PASSWORD=password \ + --ROOTPATH='/hdb' +``` + +**Note**: When used in conjunction, command line arguments override environment variables. See [Configuration](../configuration/overview.md) for a full list of configuration parameters. + +:::info +For more information on installation, see [Getting Started / Install and Connect Harper](/learn/getting-started/install-and-connect-harper). +::: + +### `harper run` + +Added in: v4.2.0 + +Run a Harper application from any location as a foreground, standard process (similar to `harper`). + +```bash +harper run /path/to/app +``` + +This command runs Harper with the specified application directory without automatic reloading or dev-specific features. + +### `harper dev` + +Added in: v4.2.0 + +Run Harper in development mode from a specified directory with automatic reloading. Recommended for local application development. Operates similar to `harper` and `harper run`. + +```bash +harper dev /path/to/app +``` + +**Features**: + +- Pushes logs to standard streams automatically +- Uses a single thread for simpler debugging +- Auto-restart on file changes + +### `harper restart` + +Available since: v4.1.0 + +Restart a running Harper instance regardless if its a foreground (`harper`, `harper run`, or `harper dev`) or background (`harper start`) process. + +```bash +harper restart +``` + +### `harper start` + +Available since: v4.1.0 + +Start Harper in background (daemon mode). + +```bash +harper start +``` + +After installation, this command launches Harper as a background process. Remember that the Harper PID is available in a `hdb.pid` file within the installation directory. + +### `harper stop` + +Available since: v4.1.0 + +Stop a running Harper instance. + +```bash +harper stop +``` + +## Installation Commands + +### `harper install` + +Available since: v4.1.0 + +Install Harper with interactive prompts or automated configuration. + +```bash +harper install +``` + +The `harper install` command operates exactly like the [`harper`](#harper) command, but exits as soon as the installation completes. See the [`harper`](#harper) command documentation above for details on providing configuration parameters via environment variables or command line arguments. + +**Note**: We recommend using `harper` instead of `harper install` as it provides a consistent workflow for both installation and running Harper. + +## Information Commands + +### `harper version` + +Available since: v4.1.0 + +Display the installed Harper version. + +```bash +harper version +``` + +**Example Output**: + +``` +4.7.0 +``` + +### `harper status` + +Available since: v4.1.0 + +Display the status of Harper and clustering. + +```bash +harper status +``` + +Shows: + +- Harper process status +- Clustering network status +- Replication statuses + +In Harper versions where NATS is supported, this command also shows the clustering hub and leaf processes too. + +### `harper help` + +Available since: v4.1.0 + +Display all available Harper CLI commands with brief descriptions. + +```bash +harper help +``` + +## Maintenance Commands + +### `harper renew-certs` + +Available since: v4.1.0 + +Renew Harper-generated self-signed certificates. + +```bash +harper renew-certs +``` + +This command regenerates the self-signed SSL/TLS certificates used by Harper. + +### `harper copy-db` + +Available since: v4.1.0 + +Copy a Harper database with compaction to eliminate free-space and fragmentation. + +```bash +harper copy-db +``` + +**Parameters**: + +- `` - Name of the source database +- `` - Full path to the target database file + +**Example**: + +```bash +harper copy-db data /home/user/hdb/database/copy.mdb +``` + +This copies the default `data` database to a new location with compaction applied. + +**Use Cases**: + +- Database optimization +- Eliminating fragmentation +- Creating compacted backups +- Reclaiming free space + +See also: [Database Compaction](../database/compaction.md) for more information. + +#### How Backups Work + +Harper uses a transactional commit process that ensures data on disk is always transactionally consistent with storage. This means Harper maintains database integrity in the event of a crash and allows you to use standard volume snapshot tools to make backups. + +**Backup Process**: + +Database files are stored in the `hdb/database` directory. As long as the snapshot is an atomic snapshot of these database files, the data can be copied/moved back into the database directory to restore a previous backup (with Harper shut down), and database integrity will be preserved. + +**Important Notes**: + +- **Atomic Snapshots**: Use volume snapshot tools that create atomic snapshots +- **Not Safe**: Simply copying an in-use database file using `cp` is **not reliable** + - Progressive reads occur at different points in time + - Results in an unreliable copy that likely won't be usable +- **Safe Copying**: Standard file copying is only reliable for database files that are **not in use** + +**Recommended Backup Tools**: + +- LVM snapshots +- ZFS snapshots +- BTRFS snapshots +- Cloud provider volume snapshots (AWS EBS, Azure Disk, GCP Persistent Disk) +- Enterprise backup solutions with snapshot capabilities + +## Remote Operations + +The CLI supports executing commands on remote Harper instances. For details, see [CLI Overview - Remote Operations](./overview.md#remote-operations). + +## See Also + +- [CLI Overview](./overview.md) - General CLI information +- [Operations API Commands](./operations-api-commands.md) - Operations available through CLI +- [CLI Authentication](./authentication.md) - Authentication mechanisms +- [Configuration](../configuration/overview.md) - Configuration parameters for installation +- [Database Compaction](../database/compaction.md) - More on database compaction diff --git a/reference/cli/operations-api-commands.md b/reference/cli/operations-api-commands.md new file mode 100644 index 00000000..bdfd49f5 --- /dev/null +++ b/reference/cli/operations-api-commands.md @@ -0,0 +1,380 @@ +--- +title: Operations API Commands +--- + + + + +# Operations API Commands + +Added in: v4.3.0 + +The Harper CLI supports executing operations from the [Operations API](../operations-api/overview.md) directly from the command line. This enables powerful automation and scripting capabilities. + +## General Syntax + +```bash +harper = +``` + +**Output Format**: + +- Default: YAML +- JSON: Pass `json=true` as a parameter + +## Supported Operations + + + +The following operations are available through the CLI. Operations that require complex nested parameters or object structures are not supported via CLI and must be executed through the HTTP API. + +### Complete Operations List + +:::note +This is just a brief overview of all operations available as CLI commands. Review the respective operation documentation for more information on available arguments and expected behavior. Keep in mind that all operations options are converted to CLI arguments in the same way (using `snake_case`). +::: + +| Operation | Description | Category | Available Since | +| -------------------------------- | ------------------------------------- | ---------------------------------------------------------------------- | --------------- | +| `describe_table` | Describe table structure and metadata | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `describe_all` | Describe all databases and tables | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `describe_database` | Describe database structure | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `create_database` | Create a new database | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `drop_database` | Delete a database | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `create_table` | Create a new table | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `drop_table` | Delete a table | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `create_attribute` | Create a table attribute | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `drop_attribute` | Delete a table attribute | [Database](../operations-api/operations.md#databases--tables) | v4.3.0 | +| `search_by_id` | Search records by ID | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `search_by_value` | Search records by attribute value | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `insert` | Insert new records | [Data](../operations-api/operations.md#nosql-operations) | v4.4.9 | +| `update` | Update existing records | [Data](../operations-api/operations.md#nosql-operations) | v4.4.9 | +| `upsert` | Insert or update records | [Data](../operations-api/operations.md#nosql-operations) | v4.4.9 | +| `delete` | Delete records | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `sql` | Execute SQL queries | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `csv_file_load` | Load data from CSV file | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `csv_url_load` | Load data from CSV URL | [Data](../operations-api/operations.md#nosql-operations) | v4.3.0 | +| `list_users` | List all users | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `add_user` | Create a new user | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `alter_user` | Modify user properties | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `drop_user` | Delete a user | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `list_roles` | List all roles | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `drop_role` | Delete a role | [Security](../operations-api/operations.md#certificate-management) | v4.3.0 | +| `create_csr` | Create certificate signing request | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `sign_certificate` | Sign a certificate | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `list_certificates` | List SSL/TLS certificates | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `add_certificate` | Add SSL/TLS certificate | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `remove_certificate` | Remove SSL/TLS certificate | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `add_ssh_key` | Add SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `get_ssh_key` | Get SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.7.2 | +| `update_ssh_key` | Update SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `delete_ssh_key` | Delete SSH key | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `list_ssh_keys` | List all SSH keys | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `set_ssh_known_hosts` | Set SSH known hosts | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `get_ssh_known_hosts` | Get SSH known hosts | [Security](../operations-api/operations.md#certificate-management) | v4.4.0 | +| `cluster_get_routes` | Get cluster routing information | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `cluster_network` | Get cluster network status | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `cluster_status` | Get cluster status | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `remove_node` | Remove node from cluster | [Clustering](../operations-api/operations.md#replication--clustering) | v4.3.0 | +| `add_component` | Add a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `deploy_component` | Deploy a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `deploy` (alias) | Alias for `deploy_component` | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `package_component` | Package a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `package` (alias) | Alias for `package_component` | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `drop_component` | Remove a component | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `get_components` | List all components | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `get_component_file` | Get component file contents | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `set_component_file` | Set component file contents | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `install_node_modules` | Install Node.js dependencies | [Components](../operations-api/operations.md#components) | v4.3.0 | +| `set_configuration` | Update configuration settings | [Configuration](../operations-api/operations.md#configuration) | v4.3.0 | +| `get_configuration` | Get current configuration | [Configuration](../operations-api/operations.md#configuration) | v4.3.0 | +| `create_authentication_tokens` | Create authentication tokens | [Authentication](../operations-api/operations.md#token-authentication) | v4.3.0 | +| `refresh_operation_token` | Refresh operation token | [Authentication](../operations-api/operations.md#token-authentication) | v4.3.0 | +| `restart_service` | Restart Harper service | [System](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `restart` | Restart Harper instance | [System](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `system_information` | Get system information | [System](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `registration_info` | Get registration information | [Licensing](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `get_fingerprint` | Get instance fingerprint | [Licensing](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `set_license` | Set license key | [Licensing](../operations-api/operations.md#registration--licensing) | v4.3.0 | +| `get_usage_licenses` | Get usage and license info | [Licensing](../operations-api/operations.md#registration--licensing) | v4.7.3 | +| `get_job` | Get job status | [Jobs](../operations-api/operations.md#jobs) | v4.3.0 | +| `search_jobs_by_start_date` | Search jobs by start date | [Jobs](../operations-api/operations.md#jobs) | v4.3.0 | +| `read_log` | Read application logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `read_transaction_log` | Read transaction logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `read_audit_log` | Read audit logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `delete_transaction_logs_before` | Delete old transaction logs | [Logging](../operations-api/operations.md#logs) | v4.3.0 | +| `purge_stream` | Purge streaming data | [Maintenance](../operations-api/operations.md#jobs) | v4.3.0 | +| `delete_records_before` | Delete old records | [Maintenance](../operations-api/operations.md#jobs) | v4.3.0 | +| `get_status` | Get custom status information | [Status](../operations-api/operations.md#registration--licensing) | v4.6.0 | +| `set_status` | Set custom status information | [Status](../operations-api/operations.md#registration--licensing) | v4.6.0 | +| `clear_status` | Clear custom status information | [Status](../operations-api/operations.md#registration--licensing) | v4.6.0 | + +### Command Aliases + +The following aliases are available for convenience: + +- `deploy` → `deploy_component` +- `package` → `package_component` + +For detailed parameter information for each operation, see the [Operations API documentation](../operations-api/operations.md). + +## Command Examples + +### Database Operations + +**Describe a database**: + +```bash +harper describe_database database=dev +``` + +**Describe a table** (with YAML output): + +```bash +harper describe_table database=dev table=dog +``` + +**Example Output**: + +```yaml +schema: dev +name: dog +hash_attribute: id +audit: true +schema_defined: false +attributes: + - attribute: id + is_primary_key: true + - attribute: name + indexed: true +clustering_stream_name: 3307bb542e0081253klnfd3f1cf551b +record_count: 10 +last_updated_record: 1724483231970.9949 +``` + +:::tip +For detailed information on database and table structures, see the [Database Reference](../database/overview.md). +::: + +### Data Operations + +**Search by ID** (with JSON output): + +```bash +harper search_by_id database=dev table=dog ids='["1"]' get_attributes='["*"]' json=true +``` + +**Search by value**: + +```bash +harper search_by_value table=dog search_attribute=name search_value=harper get_attributes='["id", "name"]' +``` + +:::tip +For more information on querying data, see the [REST Reference](../rest/overview.md) and [GraphQL Querying](../graphql-querying/overview.md). +::: + +### Configuration Operations + +**Set configuration**: + +```bash +harper set_configuration logging_level=error +``` + +**Get configuration**: + +```bash +harper get_configuration +``` + +:::tip +For comprehensive configuration options, see the [Configuration Reference](../configuration/overview.md). +::: + +### Component Operations + +**Deploy a component**: + +```bash +harper deploy_component project=my-cool-app package=https://github.com/HarperDB/application-template +``` + +**Get all components**: + +```bash +harper get_components +``` + +**Note**: `deploy` is an alias for `deploy_component`: + +```bash +harper deploy project=my-app package=https://github.com/user/repo +``` + +:::tip +For more information on components and applications, see the [Components Reference](../components/overview.md). +::: + +### User and Role Operations + +**List users**: + +```bash +harper list_users +``` + +**List roles**: + +```bash +harper list_roles +``` + +:::tip +For detailed information on users, roles, and authentication, see the [Security Reference](../security/overview.md). +::: + +## Remote Operations + +All CLI operations can be executed on remote Harper instances. See [CLI Overview - Remote Operations](./overview.md#remote-operations) for details on authentication and remote execution. + +### Remote Component Deployment + +When using remote operations, you can deploy a local component or application to the remote instance. + +**Deploy current directory**: + +If you omit the `package` parameter, the current directory will be packaged and deployed: + +```bash +harper deploy target=https://server.com:9925 +``` + +**Note**: `deploy` is an alias for `deploy_component`. + +**Deploy to clustered environment**: + +For clustered environments, use the `replicated=true` parameter to ensure the deployment is replicated to all nodes: + +```bash +harper deploy target=https://server.com:9925 replicated=true +``` + +**Restart after deployment** (with replication): + +After deploying to a clustered environment, restart all nodes to apply changes: + +```bash +harper restart target=https://server.com:9925 replicated=true +``` + +For more information on Harper applications and components, see: + +- [Components](../components/overview.md) - Application architecture and structure +- [Deploying Harper Applications](/learn/getting-started/install-and-connect-harper) - Step-by-step deployment guide + +## Parameter Formatting + +### String Parameters + +Simple string values can be passed directly: + +```bash +harper describe_table database=dev table=dog +``` + +### Array Parameters + +Array parameters must be quoted and formatted as JSON: + +```bash +harper search_by_id database=dev table=dog ids='["1","2","3"]' +``` + +### Object Parameters + +Object parameters are not supported via CLI. For operations requiring complex nested objects, use: + +- The [Operations API](../operations-api/overview.md) via HTTP +- A custom script or tool + +### Boolean Parameters + +Boolean values can be passed as strings: + +```bash +harper get_configuration json=true +harper deploy target=https://server.com:9925 replicated=true +``` + +## Output Formatting + +### YAML (Default) + +By default, CLI operation results are formatted as YAML for readability: + +```bash +harper describe_table database=dev table=dog +``` + +### JSON + +Pass `json=true` to get JSON output (useful for scripting): + +```bash +harper describe_table database=dev table=dog json=true +``` + +## Scripting and Automation + +The Operations API commands through the CLI are ideal for: + +- Build and deployment scripts +- Automation workflows +- CI/CD pipelines +- Administrative tasks +- Monitoring and health checks + +**Example Script**: + +```bash +#!/bin/bash + +# Deploy component to remote cluster +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=$SECURE_PASSWORD + +harper deploy \ + target=https://cluster-node-1.example.com:9925 \ + replicated=true \ + package=https://github.com/myorg/my-component + +# Restart the cluster +harper restart \ + target=https://cluster-node-1.example.com:9925 \ + replicated=true + +# Check status +harper get_components \ + target=https://cluster-node-1.example.com:9925 \ + json=true +``` + +## Limitations + +The following operation types are **not supported** via CLI: + +- Operations requiring complex nested JSON structures +- Operations with array-of-objects parameters +- File upload operations +- Streaming operations + +For these operations, use the [Operations API](../operations-api/overview.md) directly via HTTP. + +## See Also + +- [CLI Overview](./overview.md) - General CLI information +- [CLI Commands](./commands.md) - Core CLI commands +- [Operations API Overview](../operations-api/overview.md) - Operations API documentation +- [Operations API Reference](../operations-api/operations.md) - Complete operations list +- [CLI Authentication](./authentication.md) - Authentication details diff --git a/reference/cli/overview.md b/reference/cli/overview.md new file mode 100644 index 00000000..5571becc --- /dev/null +++ b/reference/cli/overview.md @@ -0,0 +1,200 @@ +--- +title: Harper CLI Overview +--- + + + + + + + +# Harper CLI Overview + +The Harper command line interface (CLI) is used to administer self-installed Harper instances. + +## Installation + +Available since: v4.1.0 + +Harper is typically installed globally via npm: + +```bash +npm i -g harperdb +``` + +The installation includes the Harper CLI, which provides comprehensive management capabilities for local and remote Harper instances. + +For detailed installation instructions, see the [Getting Started / Install And Connect Harper](https://docs.harperdb.io/docs/getting-started/install-and-connect-harper) guide. + +## Command Name + +Changed in: v4.7.0 + +The CLI command is `harper`. From v4.1.0 to v4.6.x, the command was only available as `harperdb`. Starting in v4.7.0, the preferred command is `harper`, though `harperdb` continues to work as an alias for backward compatibility. + +**Examples**: + +```bash +# Modern usage (v4.7.0+) +harper +harper describe_table database=dev table=dog + +# Legacy usage (still supported) +harperdb +harperdb describe_table database=dev table=dog +``` + +All examples in this documentation use `harper`. + +## General Usage + +The primary way to use Harper is to run the `harper` command. When you run `harper`: + +- If Harper is not installed, it will guide you through the installation process +- Once installed, it runs Harper in the foreground as a standard process +- This makes it compatible with systemd, Docker, and other process management tools +- Output logs directly to the console for easy monitoring + +The CLI supports two main categories of commands: + +1. **System Commands** - Core Harper management commands (start, stop, restart, status, etc.) +2. **Operations API Commands** - Execute operations from the Operations API directly via the CLI + +Both system and operations commands can be executed on local or remote Harper instances. For remote operations, authentication credentials can be provided via command parameters or environment variables. + +### CLI Installation Targeting + +By default, the CLI targets the Harper installation path stored in `~/.harperdb/hdb_boot_properties.file`. You can override this to target a specific Harper installation by specifying the `--ROOTPATH` command line argument or the `ROOTPATH` environment variable. + +**Example: Target a specific installation**: + +```bash +# Using command line argument +harper status --ROOTPATH /custom/path/to/hdb + +# Using environment variable +export ROOTPATH=/custom/path/to/hdb +harper status +``` + +### Process ID File + +When Harper is running, the process identifier (PID) is stored in a file named `hdb.pid` located in the Harper installation directory. This file can be used by external process management tools or scripts to monitor or manage the Harper process. + +**Location**: `/hdb.pid` + +**Example**: + +```bash +# Read the PID +cat /path/to/hdb/hdb.pid + +# Use with external tools +kill -0 $(cat /path/to/hdb/hdb.pid) # Check if process is running +``` + +## System Management Commands + +| Command | Description | Available Since | +| ---------------------------------- | ------------------------------------------------------------ | --------------- | +| `harper` | Run Harper in foreground mode (default behavior) | v4.1.0 | +| `harper run ` | Run Harper application from any directory | v4.2.0 | +| `harper dev ` | Run Harper in dev mode with auto-restart and console logging | v4.2.0 | +| `harper restart` | Restart Harper | v4.1.0 | +| `harper start` | Start Harper in background (daemon mode) | v4.1.0 | +| `harper stop` | Stop a running Harper instance | v4.1.0 | +| `harper status` | Display Harper and clustering status | v4.1.0 | +| `harper version` | Show installed Harper version | v4.1.0 | +| `harper renew-certs` | Renew Harper-generated self-signed certificates | v4.1.0 | +| `harper copy-db ` | Copy a database with compaction | v4.1.0 | +| `harper help` | Display all available CLI commands | v4.1.0 | + +See [CLI Commands](./commands.md) for detailed documentation on each command. + +## Operations API Commands + +Added in: v4.3.0 + +The Harper CLI supports executing most operations from the [Operations API](../operations-api/overview.md) directly from the command line. This includes operations that do not require complex nested parameters. + +**Syntax**: `harper =` + +**Output Format**: Results are formatted as YAML by default. Pass `json=true` for JSON output. + +**Examples**: + +```bash +# Describe a table +harper describe_table database=dev table=dog + +# Set configuration +harper set_configuration logging_level=error + +# Deploy a component +harper deploy_component project=my-app package=https://github.com/user/repo + +# Get all components +harper get_components + +# Search by ID (JSON output) +harper search_by_id database=dev table=dog ids='["1"]' json=true + +# SQL query +harper sql sql='select * from dev.dog where id="1"' +``` + +See [Operations API Commands](./operations-api-commands.md) for the complete list of available operations. + +## Remote Operations + +Changed in: v4.3.0 (expanded remote operations support) + +The CLI can execute operations on remote Harper instances by passing the `target` parameter with the HTTP address of the remote instance. + +**Authentication**: Provide credentials via: + +- Parameters: `username= password=` +- Environment variables: `CLI_TARGET_USERNAME` and `CLI_TARGET_PASSWORD` + +See [CLI Authentication](./authentication.md) for detailed information on authentication methods and best practices. + +**Example: CLI Target Environment Variables**: + +```bash +export CLI_TARGET_USERNAME=HDB_ADMIN +export CLI_TARGET_PASSWORD=password +harper describe_database database=dev target=https://server.com:9925 +``` + +**Example: CLI Options**: + +```bash +harper describe_database database=dev target=https://server.com:9925 username=HDB_ADMIN password=password +``` + +## Development Mode + +Added in: v4.2.0 + +For local application and component development, use `harper dev`: + +```bash +harper dev /path/to/app +``` + +**Features**: + +- Console logging for immediate feedback +- Debugging enabled +- Auto-restart on file changes +- Ideal for rapid iteration during development + +See [CLI Commands](./commands.md) for detailed information on `harper dev` and other development commands. + +## See Also + +- [CLI Commands](./commands.md) - Detailed reference for each CLI command +- [Operations API Commands](./operations-api-commands.md) - Operations available through CLI +- [CLI Authentication](./authentication.md) - Authentication mechanisms +- [Configuration](../configuration/overview.md) - Harper configuration options +- [Operations API](../operations-api/overview.md) - Full operations API reference diff --git a/reference/components/applications.md b/reference/components/applications.md new file mode 100644 index 00000000..5df64215 --- /dev/null +++ b/reference/components/applications.md @@ -0,0 +1,336 @@ +--- +title: Applications +--- + + + + + + + +# Applications + +> The contents of this page primarily relate to **application** components. The term "components" in the Operations API and CLI generally refers to applications specifically. See the [Components Overview](./overview.md) for a full explanation of terminology. + +Harper offers several approaches to managing applications that differ between local development and remote Harper instances. + +## Local Development + +### `dev` and `run` Commands + +Added in: v4.2.0 + +The quickest way to run an application locally is with the `dev` command inside the application directory: + +```sh +harperdb dev . +``` + +The `dev` command watches for file changes and restarts Harper worker threads automatically. + +The `run` command is similar but does not watch for changes. Use `run` when the main thread needs to be restarted (the `dev` command does not restart the main thread). + +Stop either process with SIGINT (Ctrl+C). + +### Deploying to a Local Harper Instance + +To mimic interaction with a hosted Harper instance locally: + +1. Start Harper: `harperdb` +2. Deploy the application: + + ```sh + harperdb deploy \ + project= \ + package= \ + restart=true + ``` + + - Omit `target` to deploy to the locally running instance. + - Setting `package=` creates a symlink so file changes are picked up automatically between restarts. + - `restart=true` restarts worker threads after deploy. Use `restart=rolling` for a rolling restart. + +3. Use `harperdb restart` in another terminal to restart threads at any time. +4. Remove an application: `harperdb drop_component project=` + +> Not all [component operations](#operations-api) are available via CLI. When in doubt, use the Operations API via direct HTTP requests to the local Harper instance. + +Example: + +```sh +harperdb deploy \ + project=test-application \ + package=/Users/dev/test-application \ + restart=true +``` + +> Use `package=$(pwd)` if your current directory is the application directory. + +## Remote Management + +Managing applications on a remote Harper instance uses the same operations as local management. The key difference is specifying a `target` along with credentials: + +```sh +harperdb deploy \ + project= \ + package= \ + username= \ + password= \ + target= \ + restart=true \ + replicated=true +``` + +Credentials can also be provided via environment variables: + +```sh +export CLI_TARGET_USERNAME= +export CLI_TARGET_PASSWORD= +harperdb deploy \ + project= \ + package= \ + target= \ + restart=true \ + replicated=true +``` + +### Package Sources + +When deploying remotely, the `package` field can be any valid npm dependency value: + +- **Omit** `package` to package and deploy the current local directory +- **npm package**: `package="@harperdb/status-check"` +- **GitHub**: `package="HarperDB/status-check"` or `package="https://github.com/HarperDB/status-check"` +- **Private repo (SSH)**: `package="git+ssh://git@github.com:HarperDB/secret-app.git"` +- **Tarball**: `package="https://example.com/application.tar.gz"` + +When using git tags, use the `semver` directive for reliable versioning: + +``` +HarperDB/application-template#semver:v1.0.0 +``` + +Harper generates a `package.json` from component configurations and uses a form of `npm install` to resolve them. This is why specifying a local file path creates a symlink (changes are picked up between restarts without redeploying). + +For SSH-based private repos, use the [Add SSH Key](#add_ssh_key) operation to register keys first. + +## Dependency Management + +Harper uses `npm` and `package.json` for dependency management. + +During application loading, Harper follows this resolution order to determine how to install dependencies: + +1. If `node_modules` exists, or if `package.json` is absent — skip installation +2. Check the application's `harperdb-config.yaml` for `install: { command, timeout }` fields +3. Derive the package manager from [`package.json#devEngines#packageManager`](https://docs.npmjs.com/cli/v10/configuring-npm/package-json#devengines) +4. Default to `npm install` + +The `add_component` and `deploy_component` operations support `install_command` and `install_timeout` fields for customizing this behavior. + +### Example `harperdb-config.yaml` with Custom Install + +```yaml +myApp: + package: ./my-app + install: + command: yarn install + timeout: 600000 # 10 minutes +``` + +### Example `package.json` with `devEngines` + +```json +{ + "name": "my-app", + "version": "1.0.0", + "devEngines": { + "packageManager": { + "name": "pnpm", + "onFail": "error" + } + } +} +``` + +> If you plan to use an alternative package manager, ensure it is installed on the host machine. Harper does not support the `"onFail": "download"` option and falls back to `"onFail": "error"` behavior. + +## Advanced: Direct `harperdb-config.yaml` Configuration + +Applications can be added to Harper by adding them directly to `harperdb-config.yaml` (located in the Harper `rootPath`, typically `~/hdb`). + +```yaml +status-check: + package: '@harperdb/status-check' +``` + +The entry name does not need to match a `package.json` dependency. Harper transforms these entries into a `package.json` and runs `npm install`. + +Any valid npm dependency specifier works: + +```yaml +myGithubComponent: + package: HarperDB-Add-Ons/package#v2.2.0 +myNPMComponent: + package: harperdb +myTarBall: + package: /Users/harper/cool-component.tar +myLocal: + package: /Users/harper/local +myWebsite: + package: https://harperdb-component +``` + +Harper generates a `package.json` and installs all components into `` (default: `~/hdb/components`). A symlink back to `/node_modules` is created for dependency resolution. + +> Use `harperdb get_configuration` to find the `rootPath` and `componentsRoot` values on your instance. + +## Operations API + +Component operations are restricted to `super_user` roles. + +### `add_component` + +Creates a new component project in the component root directory using a template. + +- `project` _(required)_ — Name of the project to create +- `template` _(optional)_ — Git URL of a template repository. Defaults to `https://github.com/HarperFast/application-template` +- `install_command` _(optional)_ — Install command. Defaults to `npm install` +- `install_timeout` _(optional)_ — Install timeout in milliseconds. Defaults to `300000` (5 minutes) +- `replicated` _(optional)_ — Replicate to all cluster nodes + +```json +{ + "operation": "add_component", + "project": "my-component" +} +``` + +### `deploy_component` + +Deploys a component using a package reference or a base64-encoded `.tar` payload. + +- `project` _(required)_ — Name of the project +- `package` _(optional)_ — Any valid npm reference (GitHub, npm, tarball, local path, URL) +- `payload` _(optional)_ — Base64-encoded `.tar` file content +- `force` _(optional)_ — Allow deploying over protected core components. Defaults to `false` +- `restart` _(optional)_ — `true` for immediate restart, `'rolling'` for sequential cluster restart +- `replicated` _(optional)_ — Replicate to all cluster nodes +- `install_command` _(optional)_ — Install command override +- `install_timeout` _(optional)_ — Install timeout override in milliseconds + +```json +{ + "operation": "deploy_component", + "project": "my-component", + "package": "HarperDB/application-template#semver:v1.0.0", + "replicated": true, + "restart": "rolling" +} +``` + +### `drop_component` + +Deletes a component project or a specific file within it. + +- `project` _(required)_ — Project name +- `file` _(optional)_ — Path relative to project folder. If omitted, deletes the entire project +- `replicated` _(optional)_ — Replicate deletion to all cluster nodes +- `restart` _(optional)_ — Restart Harper after dropping + +```json +{ + "operation": "drop_component", + "project": "my-component" +} +``` + +### `package_component` + +Packages a project folder as a base64-encoded `.tar` string. + +- `project` _(required)_ — Project name +- `skip_node_modules` _(optional)_ — Exclude `node_modules` from the package + +```json +{ + "operation": "package_component", + "project": "my-component", + "skip_node_modules": true +} +``` + +### `get_components` + +Returns all local component files, folders, and configuration from `harperdb-config.yaml`. + +```json +{ + "operation": "get_components" +} +``` + +### `get_component_file` + +Returns the contents of a file within a component project. + +- `project` _(required)_ — Project name +- `file` _(required)_ — Path relative to project folder +- `encoding` _(optional)_ — File encoding. Defaults to `utf8` + +```json +{ + "operation": "get_component_file", + "project": "my-component", + "file": "resources.js" +} +``` + +### `set_component_file` + +Creates or updates a file within a component project. + +- `project` _(required)_ — Project name +- `file` _(required)_ — Path relative to project folder +- `payload` _(required)_ — File content to write +- `encoding` _(optional)_ — File encoding. Defaults to `utf8` +- `replicated` _(optional)_ — Replicate update to all cluster nodes + +```json +{ + "operation": "set_component_file", + "project": "my-component", + "file": "test.js", + "payload": "console.log('hello world')" +} +``` + +### SSH Key Management + +For deploying from private repositories, SSH keys must be registered on the Harper instance. + +#### `add_ssh_key` + +- `name` _(required)_ — Key name +- `key` _(required)_ — Private key contents (must be ed25519; use `\n` for line breaks with trailing `\n`) +- `host` _(required)_ — Host alias for SSH config (used in `package` URL) +- `hostname` _(required)_ — Actual domain (e.g., `github.com`) +- `known_hosts` _(optional)_ — Public SSH keys of the host. Auto-retrieved for `github.com` +- `replicated` _(optional)_ — Replicate to all cluster nodes + +```json +{ + "operation": "add_ssh_key", + "name": "my-key", + "key": "-----BEGIN OPENSSH PRIVATE KEY-----\n...\n-----END OPENSSH PRIVATE KEY-----\n", + "host": "my-key.github.com", + "hostname": "github.com" +} +``` + +After adding a key, use the configured host in deploy package URLs: + +``` +"package": "git+ssh://git@my-key.github.com:my-org/my-repo.git#semver:v1.0.0" +``` + +Additional SSH key operations: `update_ssh_key`, `delete_ssh_key`, `list_ssh_keys`, `set_ssh_known_hosts`, `get_ssh_known_hosts`. diff --git a/reference/components/extension-api.md b/reference/components/extension-api.md new file mode 100644 index 00000000..5d729cf6 --- /dev/null +++ b/reference/components/extension-api.md @@ -0,0 +1,186 @@ +--- +title: Extension API +--- + + + + +# Extension API + +> As of Harper v4.6, a new iteration of the extension system called **Plugins** was released. Plugins simplify the API and are recommended for new extension development. See the [Plugin API](./plugin-api.md) reference. Both extensions and plugins are supported; extensions are not yet deprecated. + +Extensions are components that provide reusable building blocks for applications. There are two key types: + +- **Resource Extensions** — Handle specific files or directories +- **Protocol Extensions** — More advanced extensions that can return a Resource Extension; primarily used for implementing higher-level protocols and custom networking handlers + +An extension is distinguished from a plain component by implementing one or more of the Resource Extension or Protocol Extension API methods. + +## Declaring an Extension + +All extensions must define a `config.yaml` with an `extensionModule` option pointing to the extension source code (path resolves from the module root directory): + +```yaml +extensionModule: ./extension.js +``` + +If written in TypeScript or another compiled language, point to the built output: + +```yaml +extensionModule: ./dist/index.js +``` + +## Resource Extension + +A Resource Extension processes specific files or directories. It is comprised of four function exports: + +| Method | Thread | Timing | +| ------------------- | ------------------ | ------------------------- | +| `handleFile()` | All worker threads | Executed on every restart | +| `handleDirectory()` | All worker threads | Executed on every restart | +| `setupFile()` | Main thread only | Once, at initial start | +| `setupDirectory()` | Main thread only | Once, at initial start | + +> **Important**: `harperdb restart` only restarts worker threads. Code in `setupFile()` and `setupDirectory()` runs only when Harper fully shuts down and starts again—not on `deploy` or `restart`. + +`handleFile()` and `setupFile()` have identical signatures. `handleDirectory()` and `setupDirectory()` have identical signatures. + +### Resource Extension Configuration + +Resource Extensions can be configured with `files` and `urlPath` options in `config.yaml`: + +- `files` — `string | string[] | FilesOptionObject` _(required)_ — Glob pattern(s) determining which files and directories are resolved. Harper uses [fast-glob](https://github.com/mrmlnc/fast-glob) for matching. + - `source` — `string | string[]` _(required when object form)_ — Glob pattern string(s) + - `only` — `'all' | 'files' | 'directories'` _(optional)_ — Restrict matching to a single entry type. Defaults to `'all'` + - `ignore` — `string[]` _(optional)_ — Patterns to exclude from matches + +- `urlPath` — `string` _(optional)_ — Base URL path prepended to resolved entries + - Starting with `./` (e.g., `'./static/'`) prepends the component name to the URL path + - Value of `.` uses the component name as the base path + - `..` is invalid and causes an error + - Leading/trailing slashes are handled automatically (`/static/`, `static/`, and `/static` are equivalent) + +Examples: + +```yaml +# Serve HTML files from web/ at the /static/ URL path +static: + files: 'web/*.html' + urlPath: 'static' + +# Load all GraphQL schemas from src/schema/ +graphqlSchema: + files: 'src/schema/*.graphql' + +# Match files in web/, excluding web/images/ +static: + files: + source: 'web/**/*' + ignore: ['web/images'] + +# Match only files (not directories) +myExtension: + files: + source: 'dir/**/*' + only: 'files' +``` + +### Resource Extension API + +At minimum, a Resource Extension must implement one of the four methods. As a standalone extension, export them directly: + +```js +// ESM +export function handleFile() {} +export function setupDirectory() {} + +// CJS +function handleDirectory() {} +function setupFile() {} +module.exports = { handleDirectory, setupFile }; +``` + +When returned by a Protocol Extension, define them on the returned object: + +```js +export function start() { + return { + handleFile() {}, + }; +} +``` + +#### `handleFile(contents, urlPath, absolutePath, resources): void | Promise` + +#### `setupFile(contents, urlPath, absolutePath, resources): void | Promise` + +Process individual files. Can be async. + +Parameters: + +- `contents` — `Buffer` — File contents +- `urlPath` — `string` — Recommended URL path for the file +- `absolutePath` — `string` — Absolute filesystem path +- `resources` — `Object` — Currently loaded resources + +#### `handleDirectory(urlPath, absolutePath, resources): boolean | void | Promise` + +#### `setupDirectory(urlPath, absolutePath, resources): boolean | void | Promise` + +Process directories. Can be async. + +If the function returns a truthy value, the component loading sequence ends and no other entries in the directory are processed. + +Parameters: + +- `urlPath` — `string` — Recommended URL path for the directory +- `absolutePath` — `string` — Absolute filesystem path +- `resources` — `Object` — Currently loaded resources + +## Protocol Extension + +A Protocol Extension is a more advanced form of Resource Extension, primarily used for implementing higher-level protocols (e.g., building and running a Next.js project) or adding custom networking handlers. + +Protocol Extensions use the [`server`](../http/api.md) global API for custom networking. + +### Protocol Extension Configuration + +In addition to the `files`, `urlPath`, and `package` options, Protocol Extensions accept any additional configuration options defined under the extension name in `config.yaml`. These options are passed through to the `options` object of `start()` and `startOnMainThread()`. + +Many protocol extensions accept `port` and `securePort` options for configuring networking handlers. + +Example using `@harperdb/nextjs`: + +```yaml +'@harperdb/nextjs': + package: '@harperdb/nextjs' + files: './' + prebuilt: true + dev: false +``` + +### Protocol Extension API + +A Protocol Extension defines up to two methods: + +| Method | Thread | Timing | +| --------------------- | ------------------ | ------------------------- | +| `start()` | All worker threads | Executed on every restart | +| `startOnMainThread()` | Main thread only | Once, at initial start | + +Both methods receive the same `options` object and can return a Resource Extension (an object with any of the Resource Extension methods). + +#### `start(options): ResourceExtension | Promise` + +#### `startOnMainThread(options): ResourceExtension | Promise` + +Parameters: + +- `options` — `Object` — Extension configuration options from `config.yaml` + +Returns: An object implementing any of the Resource Extension methods + +## Version History + +- **v4.2.0** — Extension system introduced as part of the component architecture +- **v4.6.0** — New extension API with support for dynamic reloading; Plugin API introduced as the recommended alternative diff --git a/reference/components/javascript-environment.md b/reference/components/javascript-environment.md new file mode 100644 index 00000000..cd6f75b1 --- /dev/null +++ b/reference/components/javascript-environment.md @@ -0,0 +1,83 @@ +--- +title: JavaScript Environment +--- + + + +# JavaScript Environment + +Harper executes component JavaScript inside Node.js VM contexts — isolated module environments that share the same Node.js runtime but have their own global scope. This means each component runs in its own module context while still being able to access Harper's global APIs without any imports. + +## Module Loading + +Harper supports both ESM and CommonJS module formats. + +All Harper globals are available directly as global variables in any component module. They are also accessible by importing from the `harperdb` package, which can provide better TypeScript typing: + +```javascript +import { tables, Resource } from 'harperdb'; +``` + +```javascript +const { tables, Resource } = require('harperdb'); +``` + +For components in their own directory, link the package to your local `harperdb` installation: + +```bash +npm link harperdb +``` + +All installed components have `harperdb` automatically linked. + +## Global APIs + +### `tables` + +An object whose properties are the tables in the default database (`data`). Each table defined in `schema.graphql` is accessible as a property and implements the Resource API. + +See [Database API](../database/api.md) for full reference. + +### `databases` + +An object containing all databases defined in Harper. Each database is an object of its tables — `databases.data` is always equivalent to `tables`. + +See [Database API](../database/api.md) for full reference. + +### `transaction(fn)` + +Executes a function inside a database transaction. Changes made within the function are committed atomically, or rolled back if an error is thrown. + +See [Transactions](../database/transaction.md) for full reference. + +### `createBlob(data, options?)` + +Added in: v4.5.0 + +Creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, etc.) in `Blob`-typed schema fields. + +See [Database API](../database/api.md) for full reference. + +### `Resource` + +The base class for all Harper resources, including tables and custom data sources. Extend `Resource` to implement custom data providers. + +See [Resource API](../resources/resource-api.md) for full reference. + +### `server` + +Provides access to Harper's HTTP server middleware chain, WebSocket server, authentication helpers, resource registry, and cluster information. Also exposes `server.contentTypes` as an alias for the `contentTypes` global. + +See [HTTP API](../http/api.md) for full reference. + +### `contentTypes` + +A `Map` of MIME type strings to content type handler objects. Harper uses this map for content negotiation — deserializing incoming request bodies and serializing outgoing responses. You can register custom handlers to support additional formats. + +See [HTTP API](../http/api.md) for full reference. + +### `logger` + +Provides structured logging methods (`trace`, `debug`, `info`, `warn`, `error`, `fatal`, `notify`) that write to Harper's log file. Available without any imports in all component code. + +See [Logging API](../logging/api.md) for full reference. diff --git a/reference/components/overview.md b/reference/components/overview.md new file mode 100644 index 00000000..d41b7475 --- /dev/null +++ b/reference/components/overview.md @@ -0,0 +1,170 @@ +--- +title: Components +--- + + + + + + + +# Components + +**Components** are the high-level concept for modules that extend the Harper core platform with additional functionality. Components encapsulate both applications and extensions. + +> Harper is actively working to disambiguate component terminology. When you see "component" in the Operations API or CLI, it generally refers to an application. Documentation does its best to clarify which classification of component is meant wherever possible. + +## Concepts + +### Applications + +Added in: v4.2.0 + +**Applications** implement specific user-facing features or functionality. Applications are built on top of extensions and represent the end product that users interact with. For example, a Next.js application serving a web interface or an Apollo GraphQL server providing a GraphQL API are both applications. Also, a collection of Harper Schemas and/or custom Resources is also an application. + +### Extensions + +Added in: v4.2.0 + +**Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality they implement. For example, the built-in `graphqlSchema` extension enables applications to define databases and tables using GraphQL schemas. The `@harperdb/nextjs` and `@harperdb/apollo` extensions provide building blocks for Next.js and Apollo applications respectively. + +Extensions can also depend on other extensions. For example, `@harperdb/apollo` depends on the built-in `graphqlSchema` extension to create a cache table for Apollo queries. + +### Plugins (Experimental) + +Added in: v4.6.0 (experimental) + +**Plugins** are a new iteration of the extension system introduced in v4.6. They are simultaneously a simplification and extensibility upgrade over extensions. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only export a single `handleApplication` method. + +Plugins are **experimental**. In time extensions will be deprecated in favor of plugins, but both are currently supported. See the [Plugin API](./plugin-api.md) reference for complete documentation. + +### Built-In vs. Custom Components + +**Built-in** components are included with Harper by default and referenced directly by name. Examples include `graphqlSchema`, `rest`, `jsResource`, `static`, and `loadEnv`. + +**Custom** components use external references—npm packages, GitHub repositories, or local directories—and are typically included as `package.json` dependencies. + +Harper does not currently include built-in applications. All applications are custom. + +## Architecture + +The relationship between applications, extensions, and Harper core: + +``` +Applications + ├── Next.js App → @harperdb/nextjs extension + ├── Apollo App → @harperdb/apollo extension + └── Custom Resource → jsResource + graphqlSchema + rest extensions + +Extensions + ├── Custom: @harperdb/nextjs, @harperdb/apollo, @harperdb/astro + └── Built-In: graphqlSchema, jsResource, rest, static, loadEnv, ... + +Core + └── database, file-system, networking +``` + +## Configuration + +Harper components are configured with a `config.yaml` file in the root of the component module directory. This file is how a component configures other components it depends on. Each entry starts with a component name, with configuration values indented below: + +```yaml +componentName: + option-1: value + option-2: value +``` + +### Default Configuration + +Components without a `config.yaml` get this default configuration automatically: + +```yaml +rest: true +graphqlSchema: + files: '*.graphql' +roles: + files: 'roles.yaml' +jsResource: + files: 'resources.js' +fastifyRoutes: + files: 'routes/*.js' + urlPath: '.' +static: + files: 'web/**' +``` + +If a `config.yaml` is provided, it **replaces** the default config entirely (no merging). + +### Custom Component Configuration + +Any custom component must be configured with a `package` option for Harper to load it. The component name must match a `package.json` dependency: + +```json +{ + "dependencies": { + "@harperdb/nextjs": "1.0.0" + } +} +``` + +```yaml +'@harperdb/nextjs': + package: '@harperdb/nextjs' + files: './' +``` + +The `package` value supports any valid npm dependency specifier: npm packages, GitHub repos, tarballs, local paths, and URLs. This is because Harper generates a `package.json` from component configurations and uses `npm install` to resolve them. + +### Extension and Plugin Configuration + +Extensions require an `extensionModule` option pointing to the extension source. Plugins require a `pluginModule` option. See [Extension API](./extension-api.md) and [Plugin API](./plugin-api.md) for details. + +## Built-In Extensions Reference + +| Name | Description | +| ------------------------------------------------- | ------------------------------------------------- | +| [`dataLoader`](../database/data-loader.md) | Load data from JSON/YAML files into Harper tables | +| [`fastifyRoutes`](../fastify-routes/overview.md) | Define custom endpoints with Fastify | +| [`graphql`](../graphql-querying/overview.md) | Enable GraphQL querying (experimental) | +| [`graphqlSchema`](../database/schema.md) | Define table schemas with GraphQL syntax | +| [`jsResource`](../resources/overview.md) | Define custom JavaScript-based resources | +| [`loadEnv`](../environment-variables/overview.md) | Load environment variables from `.env` files | +| [`rest`](../rest/overview.md) | Enable automatic REST endpoint generation | +| [`roles`](../users-and-roles/overview.md) | Define role-based access control from YAML files | +| [`static`](../static-files/overview.md) | Serve static files via HTTP | + +## Known Custom Components + +### Applications + +- [`@harperdb/status-check`](https://github.com/HarperDB/status-check) +- [`@harperdb/prometheus-exporter`](https://github.com/HarperDB/prometheus-exporter) +- [`@harperdb/acl-connect`](https://github.com/HarperDB/acl-connect) + +### Extensions + +- [`@harperdb/nextjs`](https://github.com/HarperDB/nextjs) +- [`@harperdb/apollo`](https://github.com/HarperDB/apollo) +- [`@harperdb/astro`](https://github.com/HarperDB/astro) + +## Component Status Monitoring + +Added in: v4.7.0 + +Harper collects status from each component at load time and tracks any registered status change notifications. This provides visibility into the health and state of running components. + +## Evolution History + +- **v4.1.0** — Custom functions with worker threads (predecessor to components) +- **v4.2.0** — Component architecture introduced; Resource API, REST interface, MQTT, WebSockets, SSE, configurable schemas +- **v4.3.0** — Component configuration improvements +- **v4.6.0** — New extension API with dynamic reloading; Plugin API introduced (experimental) +- **v4.7.0** — Component status monitoring; further plugin API improvements + +## See Also + +- [Applications](./applications.md) — Managing and deploying applications +- [Extension API](./extension-api.md) — Building custom extensions +- [Plugin API](./plugin-api.md) — Building plugins (experimental, recommended for new extensions) +- [Resource API](../resources/resource-api.md) — Resource class interface +- [Database Schema](../database/schema.md) — Defining schemas with graphqlSchema diff --git a/reference/components/plugin-api.md b/reference/components/plugin-api.md new file mode 100644 index 00000000..c92cf839 --- /dev/null +++ b/reference/components/plugin-api.md @@ -0,0 +1,423 @@ +--- +title: Plugin API +--- + + + + + +# Plugin API + +Added in: v4.6.0 (experimental) + +> The Plugin API is **experimental**. It is the recommended approach for building new extensions, and is intended to replace the [Extension API](./extension-api.md) in the future. Both systems are supported simultaneously. + +The Plugin API is a new iteration of the extension system that simplifies the interface. Instead of defining multiple methods (`start`, `startOnMainThread`, `handleFile`, `setupFile`, `handleDirectory`, `setupDirectory`), a plugin exports a single `handleApplication` method. + +## Declaring a Plugin + +A plugin must specify a `pluginModule` option in `config.yaml` pointing to the plugin source: + +```yaml +pluginModule: plugin.js +``` + +For TypeScript or other compiled languages, point to the built output: + +```yaml +pluginModule: ./dist/index.js +``` + +It is recommended that plugins have a `package.json` with standard JavaScript package metadata (name, version, type, etc.). Plugins are standard JavaScript packages and can be published to npm, written in TypeScript, or export executables. + +## Configuration + +General plugin configuration options: + +- `files` — `string | string[] | FilesOptionObject` _(optional)_ — Glob pattern(s) for files and directories handled by the plugin's default `EntryHandler`. Pattern rules: + - Cannot contain `..` or start with `/` + - `.` or `./` is transformed to `**/*` automatically +- `urlPath` — `string` _(optional)_ — Base URL path prepended to resolved `files` entries. Cannot contain `..`. If starts with `./` or is `.`, the plugin name is automatically prepended +- `timeout` — `number` _(optional)_ — Timeout in milliseconds for plugin operations. Takes precedence over the plugin's `defaultTimeout` and the system default (30 seconds) + +### File Entries + +```yaml +# Serve files from web/ at /static/ +static: + files: 'web/**/*' + urlPath: '/static/' + +# Load only *.graphql files from src/schema/ +graphqlSchema: + files: 'src/schema/*.graphql' + +# Exclude a subdirectory +static: + files: + source: 'web/**/*' + ignore: 'web/images/**' +``` + +> Note: Unlike the Extension API, the Plugin API `files` object does **not** support an `only` field. Use `entryEvent.entryType` or `entryEvent.eventType` in your handler instead. + +### Timeouts + +The system default timeout is **30 seconds**. If `handleApplication()` does not complete within this time, the component loader throws an error to prevent indefinite hanging. + +Plugins can override the system default by exporting a `defaultTimeout`: + +```typescript +export const defaultTimeout = 60_000; // 60 seconds +``` + +Users can override at the application level in `config.yaml`: + +```yaml +customPlugin: + package: '@org/custom-plugin' + files: 'foo.js' + timeout: 45_000 # 45 seconds +``` + +## TypeScript Support + +All classes and types are exported from the `harperdb` package: + +```typescript +import type { Scope, Config } from 'harperdb'; +``` + +## API Reference + +### Function: `handleApplication(scope: Scope): void | Promise` + +The only required export from a plugin module. The component loader executes it sequentially across all worker threads. It can be async and is awaited. + +Avoid event-loop-blocking operations within `handleApplication()`. + +```typescript +export function handleApplication(scope: Scope) { + // Use scope to access config, resources, server, etc. +} +``` + +Parameters: + +- `scope` — [`Scope`](#class-scope) — Access to the application's configuration, resources, and APIs + +The `handleApplication()` method cannot coexist with Extension API methods (`start`, `handleFile`, etc.). Defining both will throw an error. + +### Class: `Scope` + +Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +The central object passed to `handleApplication()`. Provides access to configuration, file entries, server APIs, and logging. + +#### Events + +- **`'close'`** — Emitted after `scope.close()` is called +- **`'error'`** — `error: unknown` — An error occurred +- **`'ready'`** — Emitted when the Scope is ready after loading the config file + +#### `scope.handleEntry([files][, handler])` {#scopehandleentry} + +Returns an [`EntryHandler`](#class-entryhandler) for watching and processing file system entries. + +Overloads: + +- `scope.handleEntry()` — Returns the default `EntryHandler` based on `files`/`urlPath` in `config.yaml` +- `scope.handleEntry(handler)` — Returns default `EntryHandler`, registers `handler` for the `'all'` event +- `scope.handleEntry(files)` — Returns a new `EntryHandler` for custom `files` config +- `scope.handleEntry(files, handler)` — Returns a new `EntryHandler` with a custom `'all'` event handler + +Example: + +```js +export function handleApplication(scope) { + // Default handler with inline callback + scope.handleEntry((entry) => { + switch (entry.eventType) { + case 'add': + case 'change': + // handle file add/change + break; + case 'unlink': + // handle file deletion + break; + } + }); + + // Custom handler for specific files + const tsHandler = scope.handleEntry({ files: 'src/**/*.ts' }); +} +``` + +#### `scope.requestRestart()` + +Request a Harper restart. Does not restart immediately—indicates to the user that a restart is required. Called automatically if no `scope.options.on('change')` handler is defined or if a required handler is missing. + +#### `scope.resources` + +Returns: `Map` — Currently loaded [Resource](../resources/resource-api.md) instances. + +#### `scope.server` + +Returns: `server` — Reference to the [server](../http/api.md) global API. Use for registering HTTP middleware, custom networking, etc. + +#### `scope.options` + +Returns: [`OptionsWatcher`](#class-optionswatcher) — Access to the application's configuration options. Emits `'change'` events when the plugin's section of the config file is modified. + +#### `scope.logger` + +Returns: `logger` — Scoped logger instance. Recommended over the global `logger`. + +#### `scope.name` + +Returns: `string` — The plugin name as configured in `config.yaml`. + +#### `scope.directory` + +Returns: `string` — Root directory of the application component (where `config.yaml` lives). + +#### `scope.close()` + +Closes all associated entry handlers and the `scope.options` instance, emits `'close'`, and removes all listeners. + +### Class: `OptionsWatcher` + +Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +Provides reactive access to plugin configuration options, scoped to the specific plugin within the application's `config.yaml`. + +#### Events + +- **`'change'`** — `key: string[], value: ConfigValue, config: ConfigValue` — Emitted when a config option changes + - `key` — Option key split into parts (e.g., `foo.bar` → `['foo', 'bar']`) + - `value` — New value + - `config` — Entire plugin configuration object + +- **`'close'`** — Emitted when the watcher is closed +- **`'error'`** — `error: unknown` — An error occurred +- **`'ready'`** — `config: ConfigValue | undefined` — Emitted on initial load and after `'remove'` recovery +- **`'remove'`** — Config was removed (file deleted, config key deleted, or parse failure) + +Example: + +```typescript +export function handleApplication(scope) { + scope.options.on('change', (key, value, config) => { + if (key[0] === 'files') { + scope.logger.info(`Files option changed to: ${value}`); + } + }); +} +``` + +#### `options.get(key: string[]): ConfigValue | undefined` + +Get the value at a specific config key path. + +#### `options.getAll(): ConfigValue | undefined` + +Get the entire plugin configuration object. + +#### `options.getRoot(): Config | undefined` + +Get the root `config.yaml` object (all plugins and options). + +#### `options.close()` + +Close the watcher, preventing further events. + +### Class: `EntryHandler` + +Extends [`EventEmitter`](https://nodejs.org/docs/latest/api/events.html#class-eventemitter) + +Created by [`scope.handleEntry()`](#scopehandleentry). Watches file system entries matching a `files` glob pattern and emits events as files are added, changed, or removed. + +#### Events + +- **`'all'`** — `entry: FileEntryEvent | DirectoryEntryEvent` — Emitted for all entry events (add, change, unlink, addDir, unlinkDir). This is the event registered by the `scope.handleEntry(handler)` shorthand. +- **`'add'`** — `entry: AddFileEvent` — File created or first seen +- **`'addDir'`** — `entry: AddDirectoryEvent` — Directory created or first seen +- **`'change'`** — `entry: ChangeFileEvent` — File modified +- **`'close'`** — Entry handler closed +- **`'error'`** — `error: unknown` — An error occurred +- **`'ready'`** — Handler ready and watching +- **`'unlink'`** — `entry: UnlinkFileEvent` — File deleted +- **`'unlinkDir'`** — `entry: UnlinkDirectoryEvent` — Directory deleted + +Recommended pattern for handling all events: + +```js +scope.handleEntry((entry) => { + switch (entry.eventType) { + case 'add': + break; + case 'change': + break; + case 'unlink': + break; + case 'addDir': + break; + case 'unlinkDir': + break; + } +}); +``` + +#### `entryHandler.name` + +Returns: `string` — Plugin name. + +#### `entryHandler.directory` + +Returns: `string` — Application root directory. + +#### `entryHandler.close()` + +Closes the entry handler, removing all listeners. Can be restarted with `update()`. + +#### `entryHandler.update(config: FilesOption | FileAndURLPathConfig)` + +Update the handler to watch new entries. Closes and recreates the underlying watcher while preserving existing listeners. Returns a Promise that resolves when the updated handler is ready. + +### Interfaces + +#### `FilesOption` + +`string | string[] | FilesOptionObject` + +#### `FilesOptionObject` + +- `source` — `string | string[]` _(required)_ — Glob pattern(s) +- `ignore` — `string | string[]` _(optional)_ — Patterns to exclude + +#### `FileAndURLPathConfig` + +- `files` — `FilesOption` _(required)_ +- `urlPath` — `string` _(optional)_ + +#### `BaseEntry` + +- `stats` — `fs.Stats | undefined` — File system stats (may be absent depending on event, entry type, and platform) +- `urlPath` — `string` — URL path of the entry, resolved from `files` + `urlPath` options +- `absolutePath` — `string` — Absolute filesystem path + +#### `FileEntry` + +Extends `BaseEntry` + +- `contents` — `Buffer` — File contents (automatically read) + +#### `EntryEvent` + +Extends `BaseEntry` + +- `eventType` — `string` — Type of event +- `entryType` — `'file' | 'directory'` — Entry type + +#### `AddFileEvent` + +- `eventType: 'add'` +- `entryType: 'file'` +- Extends `EntryEvent`, `FileEntry` + +#### `ChangeFileEvent` + +- `eventType: 'change'` +- `entryType: 'file'` +- Extends `EntryEvent`, `FileEntry` + +#### `UnlinkFileEvent` + +- `eventType: 'unlink'` +- `entryType: 'file'` +- Extends `EntryEvent`, `FileEntry` + +#### `FileEntryEvent` + +`AddFileEvent | ChangeFileEvent | UnlinkFileEvent` + +#### `AddDirectoryEvent` + +- `eventType: 'addDir'` +- `entryType: 'directory'` +- Extends `EntryEvent` + +#### `UnlinkDirectoryEvent` + +- `eventType: 'unlinkDir'` +- `entryType: 'directory'` +- Extends `EntryEvent` + +#### `DirectoryEntryEvent` + +`AddDirectoryEvent | UnlinkDirectoryEvent` + +#### `Config` + +`{ [key: string]: ConfigValue }` + +Parsed representation of `config.yaml`. + +#### `ConfigValue` + +`string | number | boolean | null | undefined | ConfigValue[] | Config` + +#### `onEntryEventHandler` + +`(entryEvent: FileEntryEvent | DirectoryEntryEvent): void` + +Function signature for the `'all'` event handler passed to `scope.handleEntry()`. + +## Example: Static File Server Plugin + +A simplified form of the built-in `static` extension demonstrating key Plugin API patterns: + +```js +export function handleApplication(scope) { + const staticFiles = new Map(); + + // React to config changes + scope.options.on('change', (key, value, config) => { + if (key[0] === 'files' || key[0] === 'urlPath') { + staticFiles.clear(); + scope.logger.info(`Static files reset due to change in ${key.join('.')}`); + } + }); + + // Handle file entry events + scope.handleEntry((entry) => { + if (entry.entryType === 'directory') return; + + switch (entry.eventType) { + case 'add': + case 'change': + staticFiles.set(entry.urlPath, entry.contents); + break; + case 'unlink': + staticFiles.delete(entry.urlPath); + break; + } + }); + + // Register HTTP middleware + scope.server.http( + (req, next) => { + if (req.method !== 'GET') return next(req); + + const file = staticFiles.get(req.pathname); + return file ? { statusCode: 200, body: file } : { statusCode: 404, body: 'File not found' }; + }, + { runFirst: true } + ); +} +``` + +## Version History + +- **v4.6.0** — Plugin API introduced (experimental) +- **v4.7.0** — Further improvements to the Plugin API diff --git a/reference/configuration/operations.md b/reference/configuration/operations.md new file mode 100644 index 00000000..109bb8c5 --- /dev/null +++ b/reference/configuration/operations.md @@ -0,0 +1,141 @@ +--- +title: Configuration Operations +--- + + + +# Configuration Operations + +Operations API endpoints for reading and modifying Harper configuration. + +_All operations in this section are restricted to `super_user` roles._ + +For the full list of configurable options, see [Configuration Options](./options.md). + +--- + +## Set Configuration + +Modifies one or more Harper configuration parameters. **Requires a [restart](../operations-api/operations.md#restart) or [restart_service](../operations-api/operations.md#restart_service) to take effect.** + +`operation` _(required)_ — must be `set_configuration` + +Additional properties correspond to configuration keys in underscore-separated format (e.g. `logging_level` for `logging.level`, `clustering_enabled` for `clustering.enabled`). + +### Body + +```json +{ + "operation": "set_configuration", + "logging_level": "trace", + "clustering_enabled": true +} +``` + +### Response: 200 + +```json +{ + "message": "Configuration successfully set. You must restart HarperDB for new config settings to take effect." +} +``` + +--- + +## Get Configuration + +Returns the current Harper configuration. + +`operation` _(required)_ — must be `get_configuration` + +### Body + +```json +{ + "operation": "get_configuration" +} +``` + +### Response: 200 + +```json +{ + "http": { + "compressionThreshold": 1200, + "cors": false, + "corsAccessList": [null], + "keepAliveTimeout": 30000, + "port": 9926, + "securePort": null, + "timeout": 120000 + }, + "threads": 11, + "authentication": { + "cacheTTL": 30000, + "enableSessions": true, + "operationTokenTimeout": "1d", + "refreshTokenTimeout": "30d" + }, + "analytics": { + "aggregatePeriod": 60 + }, + "replication": { + "hostname": "node1", + "databases": "*", + "routes": null, + "url": "wss://127.0.0.1:9925" + }, + "componentsRoot": "/Users/hdb/components", + "localStudio": { + "enabled": false + }, + "logging": { + "auditAuthEvents": { + "logFailed": false, + "logSuccessful": false + }, + "auditLog": true, + "auditRetention": "3d", + "file": true, + "level": "error", + "root": "/Users/hdb/log", + "rotation": { + "enabled": false, + "compress": false, + "interval": null, + "maxSize": null, + "path": "/Users/hdb/log" + }, + "stdStreams": false + }, + "mqtt": { + "network": { + "port": 1883, + "securePort": 8883 + }, + "webSocket": true, + "requireAuthentication": true + }, + "operationsApi": { + "network": { + "cors": true, + "corsAccessList": ["*"], + "domainSocket": "/Users/hdb/operations-server", + "port": 9925, + "securePort": null + } + }, + "rootPath": "/Users/hdb", + "storage": { + "writeAsync": false, + "caching": true, + "compression": false, + "noReadAhead": true, + "path": "/Users/hdb/database", + "prefetchWrites": true + }, + "tls": { + "privateKey": "/Users/hdb/keys/privateKey.pem" + } +} +``` diff --git a/reference/configuration/options.md b/reference/configuration/options.md new file mode 100644 index 00000000..687e4cf2 --- /dev/null +++ b/reference/configuration/options.md @@ -0,0 +1,319 @@ +--- +title: Configuration Options +--- + + + + + + + + +# Configuration Options + +Quick reference for all `harperdb-config.yaml` top-level sections. + +For how to apply configuration (YAML file, environment variables, CLI, Operations API), see [Configuration Overview](./overview.md). + +--- + +## `http` + +Configures the Harper component server (HTTP, REST API, WebSocket). See [HTTP Configuration](../http/configuration.md) for full details. + +```yaml +http: + port: 9926 + securePort: 4443 + cors: true + timeout: 120000 + mtls: false + logging: + level: info + path: ~/hdb/log/http.log +``` + +- `sessionAffinity` — Route requests from same client to same worker thread (`ip` or header name) +- `compressionThreshold` — Response size threshold for Brotli compression; _Default_: `1200` (bytes) +- `cors` — Enable CORS; _Default_: `true` +- `corsAccessList` — Allowed domains for CORS requests +- `corsAccessControlAllowHeaders` — `Access-Control-Allow-Headers` value for OPTIONS preflight +- `headersTimeout` — Max wait for complete HTTP headers (ms); _Default_: `60000` +- `maxHeaderSize` — Max HTTP header size (bytes); _Default_: `16394` +- `requestQueueLimit` — Max estimated request queue time (ms) before 503; _Default_: `20000` +- `keepAliveTimeout` — Inactivity before closing keep-alive connection (ms); _Default_: `30000` +- `port` — HTTP port; _Default_: `9926` +- `securePort` — HTTPS port; requires [TLS configuration](../http/tls.md); _Default_: `null` +- `http2` — Enable HTTP/2; _Default_: `false` (Added in: v4.5.0) +- `timeout` — Request timeout (ms); _Default_: `120000` +- `mtls` — Enable [mTLS authentication](../security/mtls-authentication.md) for incoming connections; sub-options: `user`, `required`, `certificateVerification` (see [Certificate Verification](../security/certificate-verification.md)) +- `logging` — HTTP request logging (disabled by default, Added in: v4.6.0); sub-options: `level`, `path`, `timing`, `headers`, `id`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `threads` + +Worker thread pool configuration. + +```yaml +threads: + count: 11 + maxHeapMemory: 300 +``` + +- `count` — Number of worker threads; _Default_: CPU count minus one +- `maxHeapMemory` — Heap limit per thread (MB) +- `heapSnapshotNearLimit` — Take heap snapshot when approaching limit +- `debug` — Enable debugging; sub-options: `port`, `startingPort`, `host`, `waitForDebugger` + +--- + +## `authentication` + +Authentication and session configuration. Added in: v4.1.0; `enableSessions` added in v4.2.0. See [Authentication Configuration](../security/configuration.md). + +```yaml +authentication: + authorizeLocal: true + cacheTTL: 30000 + enableSessions: true + operationTokenTimeout: 1d + refreshTokenTimeout: 30d +``` + +- `authorizeLocal` — Auto-authorize loopback requests as superuser; _Default_: `true` +- `cacheTTL` — Session cache duration (ms); _Default_: `30000` +- `enableSessions` — Cookie-based sessions; _Default_: `true` +- `operationTokenTimeout` — Access token lifetime; _Default_: `1d` +- `refreshTokenTimeout` — Refresh token lifetime; _Default_: `1d` +- `logging` — Authentication event logging (Added in: v4.6.0); sub-options: `path`, `level`, `tag`, `stdStreams`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `operationsApi` + +Harper Operations API endpoint configuration. See [Operations API Overview](../operations-api/overview.md). + +```yaml +operationsApi: + network: + port: 9925 + cors: true + tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +- `network.cors` / `network.corsAccessList` — CORS settings +- `network.domainSocket` — Unix socket path for CLI communication; _Default_: `/hdb/operations-server` +- `network.headersTimeout` / `network.keepAliveTimeout` / `network.timeout` — Timeout settings (ms) +- `network.port` — Operations API port; _Default_: `9925` +- `network.securePort` — HTTPS port; _Default_: `null` +- `tls` — TLS override for the Operations API; sub-options: `certificate`, `certificateAuthority`, `privateKey`. See [`tls`](#tls) + +--- + +## `tls` + +Global TLS configuration for HTTPS and TLS sockets (used by HTTP and MQTT). Can be a single object or an array for SNI. See [TLS](../http/tls.md) and [Certificate Management](../security/certificate-management.md). + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +- `certificate` — Path to TLS certificate; _Default_: `/keys/certificate.pem` +- `certificateAuthority` — Path to CA file; _Default_: `/keys/ca.pem` +- `privateKey` — Path to private key; _Default_: `/keys/privateKey.pem` +- `ciphers` — Allowed TLS cipher suites + +--- + +## `mqtt` + +MQTT protocol configuration. Added in: v4.2.0. See [MQTT Configuration](../mqtt/configuration.md). + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + webSocket: true + requireAuthentication: true +``` + +- `network.port` — Insecure MQTT port; _Default_: `1883` +- `network.securePort` — Secure MQTT port; _Default_: `8883` +- `network.mtls` — Enable [mTLS](../security/mtls-authentication.md) for MQTT connections; sub-options: `user`, `required`, `certificateAuthority`, `certificateVerification` +- `webSocket` — Enable MQTT over WebSocket on HTTP port; _Default_: `true` +- `requireAuthentication` — Require credentials or mTLS; _Default_: `true` +- `logging` — MQTT event logging (Added in: v4.6.0); sub-options: `path`, `level`, `tag`, `stdStreams`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `logging` + +Application logging. Added in: v4.1.0; per-component logging added in v4.6.0. See [Logging Configuration](../logging/configuration.md). + +```yaml +logging: + level: warn + root: ~/hdb/log + stdStreams: false + auditLog: false + rotation: + interval: 1D + maxSize: 100M +``` + +- `level` — Log verbosity (`trace` → `debug` → `info` → `warn` → `error` → `fatal` → `notify`); _Default_: `warn` +- `file` — Write to file; _Default_: `true` +- `root` — Log directory; _Default_: `/log` +- `path` — Explicit log file path (overrides `root`) +- `stdStreams` — Write to stdout/stderr; _Default_: `false` +- `console` — Include `console.*` output; _Default_: `true` +- `auditLog` — Enable table transaction audit logging; _Default_: `false` +- `auditRetention` — Audit log retention duration; _Default_: `3d` +- `external` — Logging for components using the logger API; sub-options: `level`, `path` +- `rotation.enabled` / `rotation.compress` / `rotation.interval` / `rotation.maxSize` / `rotation.path` — Log file rotation (activates when `interval` or `maxSize` is set) +- `auditAuthEvents.logFailed` / `auditAuthEvents.logSuccessful` — Log failed/successful authentication events; _Default_: `false` + +--- + +## `replication` + +Native WebSocket-based replication (Plexus). Added in: v4.4.0. See [Replication](../replication/overview.md) and [Clustering](../replication/clustering.md). + +```yaml +replication: + hostname: server-one + url: wss://server-one:9933 + databases: '*' + routes: + - wss://server-two:9933 +``` + +- `hostname` — This instance's hostname within the cluster +- `url` — WebSocket URL peers use to connect to this instance +- `databases` — Databases to replicate; _Default_: `"*"` (all). Each entry supports `name` and `sharded` +- `routes` — Peer nodes; URL strings or `{hostname, port, startTime, revokedCertificates}` objects +- `port` — Replication port +- `securePort` — Secure replication port; _Default_: `9933` (changed from `9925` in v4.5.0) +- `enableRootCAs` — Verify against Node.js Mozilla CA store; _Default_: `true` +- `blobTimeout` — Blob transfer timeout (ms); _Default_: `120000` +- `failOver` — Failover to alternate node if peer unreachable; _Default_: `true` +- `shard` — Shard ID for traffic routing; see [Sharding](../replication/sharding.md) +- `mtls.certificateVerification` — Certificate revocation checking (CRL/OCSP) for replication connections; see [Certificate Verification](../security/certificate-verification.md) +- `logging` — Replication event logging; sub-options: `path`, `level`, `tag`, `stdStreams`. See [Logging Configuration](../logging/configuration.md) + +--- + +## `storage` + +Database storage configuration. See [Database Overview](../database/overview.md) and [Compaction](../database/compaction.md). + +```yaml +storage: + path: ~/hdb/database + caching: true + compression: true + compactOnStart: false +``` + +- `writeAsync` — Disable disk sync for higher throughput (**disables durability guarantees**); _Default_: `false` +- `caching` — In-memory record caching; _Default_: `true` +- `compression` — LZ4 record compression; _Default_: `true` (enabled by default since v4.3.0). Sub-options: `dictionary`, `threshold` +- `compactOnStart` — Compact all non-system databases on startup; _Default_: `false` (Added in: v4.3.0) +- `compactOnStartKeepBackup` — Retain compaction backups; _Default_: `false` +- `maxTransactionQueueTime` — Max write queue time before 503; _Default_: `45s` +- `noReadAhead` — Advise OS against read-ahead; _Default_: `false` +- `prefetchWrites` — Prefetch before write transactions; _Default_: `true` +- `path` — Database files directory; _Default_: `/database` +- `blobPaths` — Blob storage directory or directories; _Default_: `/blobs` (Added in: v4.5.0) +- `pageSize` — Database page size (bytes); _Default_: OS default +- `reclamation.threshold` / `reclamation.interval` / `reclamation.evictionFactor` — Background storage reclamation settings (Added in: v4.5.0) + +--- + +## `databases` + +Per-database and per-table file path overrides. Must be set before the database/table is created. See [Database Overview](../database/overview.md). + +```yaml +databases: + myDatabase: + path: /data/myDatabase + auditPath: /data/myDatabase-audit + tables: + myTable: + path: /data/myTable +``` + +- `.path` — Database files directory +- `.auditPath` — Audit log directory for this database +- `.tables..path` — Table files directory + +--- + +## `analytics` + +Analytics aggregation configuration. See [Analytics Overview](../analytics/overview.md). + +```yaml +analytics: + aggregatePeriod: 60 + replicate: false +``` + +- `aggregatePeriod` — Aggregation interval (seconds); _Default_: `60` (Added in: v4.5.0) +- `replicate` — Replicate analytics data across cluster; _Default_: `false` + +--- + +## `localStudio` + +Local Harper Studio GUI. See [Studio](../studio/overview.md). + +```yaml +localStudio: + enabled: true +``` + +- `enabled` — Enable local Studio at `http://localhost:`; _Default_: `false` + +--- + +## `componentsRoot` + +Path to local component files. Added in: v4.2.0 (previously `customFunctionsRoot`). See [Components](../components/overview.md). + +```yaml +componentsRoot: ~/hdb/components +``` + +--- + +## `rootPath` + +Root directory for all Harper persistent data, config, logs, and components. + +```yaml +rootPath: /var/lib/harper +``` + +--- + +## Component Configuration + +Installed components are configured directly at the root of `harperdb-config.yaml` using the component name as the key — not nested under a `components:` section. See [Components](../components/overview.md). + +```yaml +my-component: + package: 'HarperDB-Add-Ons/my-component' + port: 4321 +``` + +- `.package` — NPM package name, GitHub repo (`user/repo`), or local path +- `.port` — Port for the component; _Default_: value of `http.port` diff --git a/reference/configuration/overview.md b/reference/configuration/overview.md new file mode 100644 index 00000000..747e681c --- /dev/null +++ b/reference/configuration/overview.md @@ -0,0 +1,209 @@ +--- +title: Configuration Overview +--- + + + + +# Configuration + +Harper is configured through a [YAML](https://yaml.org/) file called `harperdb-config.yaml` located in the Harper root directory. By default the root directory is a folder named `hdb` in the home directory of the current user. + +Some configuration values are pre-populated in the config file on install, regardless of whether they are used. + +For a complete reference of all available configuration options, see [Configuration Options](./options.md). + +--- + +## The Configuration File + +To change a configuration value, edit `harperdb-config.yaml` and save. **Harper must be restarted for changes to take effect.** + +Configuration keys use camelCase (e.g. `operationsApi`). Nested keys use dot notation conceptually (e.g. `operationsApi.network.port`). + +--- + +## Setting Configuration Values + +All configuration values can be set through four mechanisms: + +### 1. YAML File (direct edit) + +Edit `harperdb-config.yaml` directly: + +```yaml +http: + port: 9926 +logging: + level: warn +``` + +### 2. Environment Variables + +Map YAML keys to `SCREAMING_SNAKE_CASE`. Use underscores for nesting. Keys are case-insensitive. + +Examples: + +- `http.port` → `HTTP_PORT=9926` +- `logging.rotation.enabled` → `LOGGING_ROTATION_ENABLED=false` +- `operationsApi.network.port` → `OPERATIONSAPI_NETWORK_PORT=9925` + +```bash +HTTP_PORT=9926 harperdb +``` + +> **Note:** Component configuration cannot be set via environment variables or CLI arguments. + +### 3. CLI Arguments + +Same naming convention as environment variables, prefixed with `--`: + +```bash +harperdb --HTTP_PORT 9926 --LOGGING_LEVEL warn +``` + +### 4. Operations API + +Use `set_configuration` with underscore-separated key paths: + +```json +{ + "operation": "set_configuration", + "http_port": 9926, + "logging_level": "warn" +} +``` + +See [Configuration Operations](./operations.md) for the full `set_configuration` and `get_configuration` API reference. + +--- + +## Custom Config File Path + +To specify a custom config file location at install time, use the `HDB_CONFIG` variable: + +```bash +# Use a custom config file path +HDB_CONFIG=/path/to/custom-config.yaml harperdb + +# Install over an existing config +HDB_CONFIG=/existing/rootpath/harperdb-config.yaml harperdb +``` + +--- + +## Environment Variable-Based Configuration + +Added in: v4.7.2 + +Harper provides two special environment variables for managing configuration across deployments: `HARPER_DEFAULT_CONFIG` and `HARPER_SET_CONFIG`. Both accept JSON-formatted configuration that mirrors the structure of `harperdb-config.yaml`. + +```bash +export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' +export HARPER_SET_CONFIG='{"authentication":{"enabled":true}}' +``` + +### HARPER_DEFAULT_CONFIG + +Provides default configuration values while respecting user modifications. Ideal for supplying sensible defaults without preventing administrators from customizing their instances. + +**At installation time:** + +- Overrides template default values +- Respects values set by `HARPER_SET_CONFIG` +- Respects values from existing config files (when using `HDB_CONFIG`) + +**At runtime:** + +- Only updates values it originally set +- Detects and respects manual user edits to the config file +- When a key is removed from the variable, the original value is restored + +**Example:** + +```bash +export HARPER_DEFAULT_CONFIG='{"http":{"port":8080},"logging":{"level":"info"}}' +harperdb + +# If an administrator manually changes the port to 9000, Harper will +# detect this edit and respect it on subsequent restarts. + +# If http.port is removed from HARPER_DEFAULT_CONFIG later, +# the port reverts to the original template default (9926). +``` + +### HARPER_SET_CONFIG + +Forces configuration values that cannot be overridden by user edits. Designed for security policies, compliance requirements, or critical operational settings. + +**At runtime:** + +- Always overrides all other configuration sources +- Takes precedence over user edits, file values, and `HARPER_DEFAULT_CONFIG` +- When a key is removed from the variable, it is deleted from the config (not restored) + +**Example:** + +```bash +export HARPER_SET_CONFIG='{"authentication":{"enabled":true},"logging":{"level":"error","stdStreams":true}}' +harperdb + +# Any change to these values in harperdb-config.yaml will be +# overridden on the next restart. +``` + +### Combining Both Variables + +```bash +# Provide sensible defaults (can be overridden by admins) +export HARPER_DEFAULT_CONFIG='{"http":{"port":8080,"cors":true},"logging":{"level":"info"}}' + +# Enforce critical settings (cannot be changed) +export HARPER_SET_CONFIG='{"authentication":{"enabled":true}}' +``` + +### Configuration Precedence + +From highest to lowest: + +1. **`HARPER_SET_CONFIG`** — Always wins +2. **User manual edits** — Detected via drift detection +3. **`HARPER_DEFAULT_CONFIG`** — Applied if no user edits detected +4. **File defaults** — Original template values + +### State Tracking + +Harper maintains a state file at `{rootPath}/backup/.harper-config-state.json` to track the source of each configuration value. This enables: + +- **Drift detection**: Identifying when users manually edit values set by `HARPER_DEFAULT_CONFIG` +- **Restoration**: Restoring original values when keys are removed from `HARPER_DEFAULT_CONFIG` +- **Conflict resolution**: Determining which source should take precedence + +### Format Reference + +The JSON structure mirrors the YAML config file: + +**YAML:** + +```yaml +http: + port: 8080 + cors: true +logging: + level: info + rotation: + enabled: true +``` + +**Environment variable (JSON):** + +```json +{ "http": { "port": 8080, "cors": true }, "logging": { "level": "info", "rotation": { "enabled": true } } } +``` + +### Important Notes + +- Both variables must contain valid JSON matching the structure of `harperdb-config.yaml` +- Invalid values are caught by Harper's configuration validator at startup +- Changes to these variables require a Harper restart to take effect +- The state file is per-instance (stored in the root path) diff --git a/reference/database/api.md b/reference/database/api.md new file mode 100644 index 00000000..bb8c225d --- /dev/null +++ b/reference/database/api.md @@ -0,0 +1,243 @@ +--- +title: API +--- + + + + + +# API + +Harper exposes a set of global variables and functions that JavaScript code (in components, applications, and plugins) can use to interact with the database system. + +## `tables` + +`tables` is an object whose properties are the tables in the default database (`data`). Each table defined in your `schema.graphql` file is available as a property, and the value is the table class that implements the [Resource API](../resources/resource-api.md). + +```graphql +# schema.graphql +type Product @table { + id: Long @primaryKey + name: String + price: Float +} +``` + +```javascript +const { Product } = tables; +// same as: databases.data.Product +``` + +### Example + +```javascript +// Create a new record (id auto-generated) +const created = await Product.create({ name: 'Shirt', price: 9.5 }); + +// Modify the record +await Product.patch(created.id, { price: Math.round(created.price * 0.8 * 100) / 100 }); + +// Retrieve by primary key +const record = await Product.get(created.id); + +// Query with conditions +const query = { + conditions: [{ attribute: 'price', comparator: 'less_than', value: 8.0 }], +}; +for await (const record of Product.search(query)) { + // ... +} +``` + +For the full set of methods available on table classes, see the [Resource API](../resources/resource-api.md). + +## `databases` + +`databases` is an object whose properties are Harper databases. Each database contains its tables as properties, the same way `tables` does for the default database. In fact, `databases.data === tables` is always true. + +Use `databases` when you need to access tables from a non-default database. + +### Example + +```javascript +const { Product } = databases.data; // default database +const Events = databases.analytics.Events; // another database + +// Create an event record +const event = await Events.create({ eventType: 'login', timestamp: Date.now() }); + +// Query events +for await (const e of Events.search({ conditions: [{ attribute: 'eventType', value: 'login' }] })) { + // handle each event +} +``` + +To define tables in a non-default database, use the `database` argument on the `@table` directive in your schema: + +```graphql +type Events @table(database: "analytics") { + id: Long @primaryKey + eventType: String @indexed +} +``` + +See [Schema](./schema.md) for full schema definition syntax. + +## `transaction(context?, callback)` + +`transaction` executes a callback within a database transaction and returns a promise that resolves when the transaction commits. The callback may be async. + +```typescript +transaction(context?: object, callback: (txn: Transaction) => any | Promise): Promise +``` + +For most operations — HTTP request handlers, for example — Harper automatically starts a transaction. Use `transaction()` explicitly when your code runs outside of a natural transaction context, such as in timers or background jobs. + +### Basic Usage + +```javascript +import { tables } from 'harperdb'; +const { MyTable } = tables; + +if (isMainThread) { + setInterval(async () => { + let data = await (await fetch('https://example.com/data')).json(); + transaction(async (txn) => { + for (let item of data) { + await MyTable.put(item, txn); + } + }); + }, 3600000); // every hour +} +``` + +### Nesting + +If `transaction()` is called with a context that already has an active transaction, it reuses that transaction, executes the callback immediately, and returns. This makes `transaction()` safe to call defensively to ensure a transaction is always active. + +### Transaction Object + +The callback receives a `txn` object with the following members: + +| Member | Type | Description | +| --------------------- | --------------- | ------------------------------------------------------ | +| `commit()` | `() => Promise` | Commits the current transaction | +| `abort()` | `() => void` | Aborts the transaction and resets it | +| `resetReadSnapshot()` | `() => void` | Resets the read snapshot to the latest committed state | +| `timestamp` | `number` | Timestamp associated with the current transaction | + +On normal callback completion the transaction is committed automatically. If the callback throws, the transaction is aborted. + +### Transaction Scope and Atomicity + +Transactions span a single database. All tables within the same database share a single transactional context: reads return a consistent snapshot, and writes across multiple tables are committed atomically. If code accesses tables in different databases, each database gets its own transaction with no cross-database atomicity guarantee. + +For deeper background on Harper's transaction model, see [Storage Algorithm](./storage-algorithm.md). + +## `createBlob(data, options?)` + +Added in: v4.5.0 + +`createBlob` creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, large HTML, etc.) in a `Blob`-typed schema field. + +```typescript +createBlob(data: Buffer | Uint8Array | ReadableStream | string, options?: BlobOptions): Blob +``` + +Harper's `Blob` extends the [Web API `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob), so standard methods (`.text()`, `.arrayBuffer()`, `.stream()`, `.slice()`, `.bytes()`) are all available. Unlike `Bytes`, blobs are stored separately from the record, support streaming, and do not need to fit in memory. + +### Basic Usage + +Declare a blob field in your schema (see [Schema — Blob Type](./schema.md#blob-type)): + +```graphql +type MyTable @table { + id: Any! @primaryKey + data: Blob +} +``` + +Create and store a blob: + +```javascript +let blob = createBlob(largeBuffer); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Retrieve blob data using standard `Blob` methods: + +```javascript +let record = await MyTable.get('my-record'); +let buffer = await record.data.bytes(); // ArrayBuffer +let text = await record.data.text(); // string +let stream = record.data.stream(); // ReadableStream +``` + +### Streaming + +`createBlob` supports streaming data in as data is streamed out — useful for large media where low-latency transmission from origin is critical: + +```javascript +let blob = createBlob(incomingStream); +// blob exists, but data is still streaming to storage +await MyTable.put({ id: 'my-record', data: blob }); + +let record = await MyTable.get('my-record'); +// blob data is accessible as it arrives +let outgoingStream = record.data.stream(); +``` + +Because blobs can be referenced before they are fully written, they are **not** ACID-compliant by default. Use `saveBeforeCommit: true` to wait for the full write before committing: + +```javascript +let blob = createBlob(stream, { saveBeforeCommit: true }); +await MyTable.put({ id: 'my-record', data: blob }); +// put() resolves only after blob is fully written and record is committed +``` + +### `BlobOptions` + +| Option | Type | Default | Description | +| ------------------ | --------- | ------- | ----------------------------------------------------------------------- | +| `saveBeforeCommit` | `boolean` | `false` | Wait for the blob to be fully written before committing the transaction | + +### Error Handling + +Blobs written from a stream can fail mid-stream after the record is committed. Register an error handler to respond to interrupted writes: + +```javascript +export class MyEndpoint extends MyTable { + async get(target) { + const record = await super.get(target); + let blob = record.data; + blob.on('error', () => { + MyTable.invalidate(target); + }); + return { status: 200, headers: {}, body: blob }; + } +} +``` + +### `size` Property + +Blobs created from a stream may not have `size` available immediately. Listen for the `size` event if you need it: + +```javascript +let blob = record.data; +if (blob.size === undefined) { + blob.on('size', (size) => { + // called once size is determined + }); +} +``` + +### Blob Coercion + +When a field is typed as `Blob` in the schema, any string or buffer assigned via `put`, `patch`, or `publish` is automatically coerced to a `Blob`. This means plain JSON HTTP bodies and MQTT messages work without manual `createBlob()` calls in most cases. + +## Related Documentation + +- [Schema](./schema.md) — Defining tables and blob fields +- [Resource API](../resources/resource-api.md) — Full table class method reference +- [Transaction Logging](./transaction.md) — Audit log and transaction log for data change history +- [Configuration](../configuration/options.md) — Blob storage path configuration diff --git a/reference/database/compaction.md b/reference/database/compaction.md new file mode 100644 index 00000000..a4ede5f2 --- /dev/null +++ b/reference/database/compaction.md @@ -0,0 +1,71 @@ +--- +title: Compaction +--- + + + + +# Compaction + +Added in: v4.3.0 + +Database files grow over time as records are inserted, updated, and deleted. Deleted records and updated values leave behind free space (fragmentation) in the database file, which can increase file size and potentially affect performance. Compaction eliminates this free space, creating a smaller, contiguous database file. + +> **Note:** Compaction does not compress your data. It removes internal fragmentation to make the file smaller. To enable compression on a database, use compaction to copy the database with updated storage configuration applied. + +Compaction is also the mechanism to apply storage configuration changes (such as enabling compression) to existing databases, since some storage settings cannot be changed in-place. + +## Copy Compaction + +Creates a compacted copy of a database file. The original database is left unchanged. + +> **Recommendation:** Stop Harper before performing copy compaction to prevent any record loss during the copy operation. + +Run using the [CLI](../cli/commands.md): + +```bash +harperdb copy-db +``` + +The `source-database` is the database name (not a file path). The target is the full file path where the compacted copy will be written. + +To replace the original database with the compacted copy, move or rename the output file to the original database path after Harper is stopped. + +**Example — compact the default `data` database:** + +```bash +harperdb copy-db data /home/user/hdb/database/copy.mdb +``` + +## Compact on Start + +Automatically compacts all non-system databases when Harper starts. Harper will not start until compaction is complete. Under the hood, it loops through all user databases, creates a backup of each, compacts it, replaces the original with the compacted copy, and removes the backup. + +Configure in `harperdb-config.yaml`: + +```yaml +storage: + compactOnStart: true + compactOnStartKeepBackup: false +``` + +Using CLI environment variables: + +```bash +STORAGE_COMPACTONSTART=true STORAGE_COMPACTONSTARTKEEPBACKUP=true harperdb +``` + +### Options + +| Option | Type | Default | Description | +| -------------------------- | ------- | ------- | ------------------------------------------------------------------------------- | +| `compactOnStart` | Boolean | `false` | Compact all databases at startup. Automatically reset to `false` after running. | +| `compactOnStartKeepBackup` | Boolean | `false` | Retain the backup copy created during compact on start | + +> **Note:** `compactOnStart` is automatically set back to `false` after it runs, so compaction only happens on the next start if you explicitly re-enable it. + +## Related Documentation + +- [Storage Algorithm](./storage-algorithm.md) — How Harper stores data using LMDB +- [CLI Commands](../cli/commands.md) — `copy-db` CLI command reference +- [Configuration](../configuration/options.md) — Full storage configuration options including compression settings diff --git a/reference/database/data-loader.md b/reference/database/data-loader.md new file mode 100644 index 00000000..962b3706 --- /dev/null +++ b/reference/database/data-loader.md @@ -0,0 +1,216 @@ +--- +title: Data Loader +--- + + + + +# Data Loader + +Added in: v4.6.0 + +The Data Loader is a built-in component that loads data from JSON or YAML files into Harper tables as part of component deployment. It is designed for seeding tables with initial records — configuration data, reference data, default users, or other records that should exist when a component is first deployed or updated. + +## Configuration + +In your component's `config.yaml`, use the `dataLoader` key to specify the data files to load: + +```yaml +dataLoader: + files: 'data/*.json' +``` + +`dataLoader` is an [Extension](../components/extension-api.md) and supports the standard `files` configuration option, including glob patterns. + +## Data File Format + +Each data file loads records into a single table. The file specifies the target database, table, and an array of records. + +### JSON Example + +```json +{ + "database": "myapp", + "table": "users", + "records": [ + { + "id": 1, + "username": "admin", + "email": "admin@example.com", + "role": "administrator" + }, + { + "id": 2, + "username": "user1", + "email": "user1@example.com", + "role": "standard" + } + ] +} +``` + +### YAML Example + +```yaml +database: myapp +table: settings +records: + - id: 1 + setting_name: app_name + setting_value: My Application + - id: 2 + setting_name: version + setting_value: '1.0.0' +``` + +One table per file. To load data into multiple tables, create a separate file for each table. + +## File Patterns + +The `files` option accepts a single path, a list of paths, or a glob pattern: + +```yaml +# Single file +dataLoader: + files: 'data/seed-data.json' + +# Multiple specific files +dataLoader: + files: + - 'data/users.json' + - 'data/settings.yaml' + - 'data/initial-products.json' + +# Glob pattern +dataLoader: + files: 'data/**/*.{json,yaml,yml}' +``` + +## Loading Behavior + +The Data Loader runs on every full system start and every component deployment — this includes fresh installs, restarts of the Harper process or threads, and redeployments of the component. + +Because the Data Loader runs on every startup and deployment, change detection is central to how it works safely. On each run: + +1. All specified data files are read (JSON or YAML) +2. Each file is validated to reference a single table +3. Records are inserted or updated based on content hash comparison: + - New records are inserted if they don't exist + - Existing records are updated only if the data file content has changed + - Records created outside the Data Loader (via Operations API, REST, etc.) are never overwritten + - Records modified by users after being loaded are preserved and not overwritten + - Extra fields added by users to data-loaded records are preserved during updates +4. SHA-256 content hashes are stored in the [`hdb_dataloader_hash`](./system-tables.md#hdb_dataloader_hash) system table to track which records have been loaded and detect changes + +### Change Detection + +| Scenario | Behavior | +| -------------------------------------------------- | ------------------------------------------------ | +| New record | Inserted; content hash stored | +| Unchanged record | Skipped (no writes) | +| Changed data file | Updated via `patch`, preserving any extra fields | +| Record created by user (not data loader) | Never overwritten | +| Record modified by user after load | Preserved, not overwritten | +| Extra fields added by user to a data-loaded record | Preserved during updates | + +This design makes data files safe to redeploy repeatedly — across deployments, node scaling, and system restarts — without losing manual modifications or causing unnecessary writes. + +## Best Practices + +**Define schemas first.** While the Data Loader can infer schemas from the records it loads, it is strongly recommended to define table schemas explicitly using the [graphqlSchema component](./schema.md) before loading data. This ensures proper types, constraints, and relationships. + +**One table per file.** Each data file must target a single table. Organize files accordingly. + +**Idempotent data.** Design files to be safe to load multiple times without creating duplicate or conflicting records. + +**Version control.** Include data files in version control for consistency across deployments and environments. + +**Environment-specific data.** Consider using different data files for different environments (development, staging, production) to avoid loading inappropriate records. + +**Validate before deploying.** Ensure data files are valid JSON or YAML and match your table schemas before deployment to catch type mismatches early. + +**No sensitive data.** Do not include passwords, API keys, or secrets directly in data files. Use environment variables or secure configuration management instead. + +## Example Component Structure + +A common production use case is shipping reference data — lookup tables like countries and regions — as part of a component. The records are version-controlled alongside the code, consistent across every environment, and the data loader keeps them in sync on every deployment without touching any user-modified fields. + +``` +my-component/ +├── config.yaml +├── schemas.graphql +├── roles.yaml +└── data/ + ├── countries.json # ISO country codes — reference data, ships with component + └── regions.json # region/subdivision codes +``` + +**`config.yaml`**: + +```yaml +graphqlSchema: + files: 'schemas.graphql' + +roles: + files: 'roles.yaml' + +dataLoader: + files: 'data/*.json' + +rest: true +``` + +**`schemas.graphql`**: + +```graphql +type Country @table(database: "myapp") @export { + id: String @primaryKey # ISO 3166-1 alpha-2, e.g. "US" + name: String @indexed + region: String @indexed +} + +type Region @table(database: "myapp") @export { + id: String @primaryKey # ISO 3166-2, e.g. "US-CA" + name: String @indexed + countryId: String @indexed + country: Country @relationship(from: countryId) +} +``` + +**`data/countries.json`**: + +```json +{ + "database": "myapp", + "table": "Country", + "records": [ + { "id": "US", "name": "United States", "region": "Americas" }, + { "id": "GB", "name": "United Kingdom", "region": "Europe" }, + { "id": "DE", "name": "Germany", "region": "Europe" } + // ... all ~250 ISO countries + ] +} +``` + +**`data/regions.json`**: + +```json +{ + "database": "myapp", + "table": "Region", + "records": [ + { "id": "US-CA", "name": "California", "countryId": "US" }, + { "id": "US-NY", "name": "New York", "countryId": "US" }, + { "id": "GB-ENG", "name": "England", "countryId": "GB" } + // ... + ] +} +``` + +Because the data loader uses content hashing, adding new countries or correcting a name in the file will update only the changed records on the next deployment — existing records that haven't changed are skipped entirely. + +## Related Documentation + +- [Schema](./schema.md) — Defining table structure before loading data +- [Jobs](./jobs.md) — Bulk data operations via the Operations API (CSV/JSON import from file, URL, or S3) +- [Components](../components/overview.md) — Extension and plugin system that the data loader is built on diff --git a/reference/database/jobs.md b/reference/database/jobs.md new file mode 100644 index 00000000..63fd8e0e --- /dev/null +++ b/reference/database/jobs.md @@ -0,0 +1,272 @@ +--- +title: Jobs +--- + + + + + +# Jobs + +Harper uses an asynchronous job system for long-running data operations. When a bulk operation is initiated — such as loading a large CSV file or exporting millions of records — Harper starts a background job and immediately returns a job ID. Use the job ID to check progress and status. + +Job status values: + +- `IN_PROGRESS` — the job is currently running +- `COMPLETE` — the job finished successfully + +## Bulk Operations + +The following operations create jobs. All bulk operations are sent to the [Operations API](../operations-api/overview.md). + +### CSV Data Load + +Ingests CSV data provided directly in the request body. + +- `operation` _(required)_ — `csv_data_load` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `data` _(required)_ — CSV content as a string + +```json +{ + "operation": "csv_data_load", + "database": "dev", + "action": "insert", + "table": "breed", + "data": "id,name,country\n1,Labrador,Canada\n2,Poodle,France\n" +} +``` + +Response: + +```json +{ + "message": "Starting job with id 2fe25039-566e-4670-8bb3-2db3d4e07e69", + "job_id": "2fe25039-566e-4670-8bb3-2db3d4e07e69" +} +``` + +--- + +### CSV File Load + +Ingests CSV data from a file on the server's local filesystem. + +> The CSV file must reside on the same machine running Harper. + +- `operation` _(required)_ — `csv_file_load` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `file_path` _(required)_ — absolute path to the CSV file on the host + +```json +{ + "operation": "csv_file_load", + "action": "insert", + "database": "dev", + "table": "breed", + "file_path": "/home/user/imports/breeds.csv" +} +``` + +--- + +### CSV URL Load + +Ingests CSV data from a URL. + +- `operation` _(required)_ — `csv_url_load` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `csv_url` _(required)_ — URL pointing to the CSV file + +```json +{ + "operation": "csv_url_load", + "action": "insert", + "database": "dev", + "table": "breed", + "csv_url": "https://s3.amazonaws.com/mydata/breeds.csv" +} +``` + +--- + +### Import from S3 + +Imports CSV or JSON files from an AWS S3 bucket. + +- `operation` _(required)_ — `import_from_s3` +- `database` _(optional)_ — target database; defaults to `data` +- `table` _(required)_ — target table +- `action` _(optional)_ — `insert`, `update`, or `upsert`; defaults to `insert` +- `s3` _(required)_ — S3 connection details: + - `aws_access_key_id` + - `aws_secret_access_key` + - `bucket` + - `key` — filename including extension (`.csv` or `.json`) + - `region` + +```json +{ + "operation": "import_from_s3", + "action": "insert", + "database": "dev", + "table": "dog", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET_KEY", + "bucket": "BUCKET_NAME", + "key": "dogs.json", + "region": "us-east-1" + } +} +``` + +--- + +### Export Local + +Exports table data to a local file in JSON or CSV format. + +- `operation` _(required)_ — `export_local` +- `format` _(required)_ — `json` or `csv` +- `path` _(required)_ — local directory path where the export file will be written +- `search_operation` _(required)_ — query to select records: `search_by_hash`, `search_by_value`, `search_by_conditions`, or `sql` + +Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation for exports + +- `filename` _(optional)_ — filename without extension; auto-generated from epoch timestamp if omitted + +```json +{ + "operation": "export_local", + "format": "json", + "path": "/data/exports/", + "search_operation": { + "operation": "sql", + "sql": "SELECT * FROM dev.breed" + } +} +``` + +--- + +### Export to S3 + +Exports table data to an AWS S3 bucket in JSON or CSV format. + +Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation + +- `operation` _(required)_ — `export_to_s3` +- `format` _(required)_ — `json` or `csv` +- `s3` _(required)_ — S3 connection details (same fields as Import from S3, plus `key` for the output object name) +- `search_operation` _(required)_ — `search_by_hash`, `search_by_value`, `search_by_conditions`, or `sql` + +```json +{ + "operation": "export_to_s3", + "format": "json", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET_KEY", + "bucket": "BUCKET_NAME", + "key": "exports/dogs.json", + "region": "us-east-1" + }, + "search_operation": { + "operation": "sql", + "sql": "SELECT * FROM dev.dog" + } +} +``` + +--- + +### Delete Records Before + +Deletes records older than a given timestamp from a table. Operates only on the local node — clustered replicas retain their data. + +_Restricted to `super_user` roles._ + +- `operation` _(required)_ — `delete_records_before` +- `schema` _(required)_ — database name +- `table` _(required)_ — table name +- `date` _(required)_ — records with `__createdtime__` before this timestamp are deleted. Format: `YYYY-MM-DDThh:mm:ss.sZ` + +```json +{ + "operation": "delete_records_before", + "date": "2024-01-01T00:00:00.000Z", + "schema": "dev", + "table": "breed" +} +``` + +## Managing Jobs + +### Get Job + +Returns status, metrics, and messages for a specific job by ID. + +- `operation` _(required)_ — `get_job` +- `id` _(required)_ — job ID + +```json +{ + "operation": "get_job", + "id": "4a982782-929a-4507-8794-26dae1132def" +} +``` + +Response: + +```json +[ + { + "__createdtime__": 1611615798782, + "__updatedtime__": 1611615801207, + "created_datetime": 1611615798774, + "end_datetime": 1611615801206, + "id": "4a982782-929a-4507-8794-26dae1132def", + "job_body": null, + "message": "successfully loaded 350 of 350 records", + "start_datetime": 1611615798805, + "status": "COMPLETE", + "type": "csv_url_load", + "user": "HDB_ADMIN", + "start_datetime_converted": "2021-01-25T23:03:18.805Z", + "end_datetime_converted": "2021-01-25T23:03:21.206Z" + } +] +``` + +--- + +### Search Jobs by Start Date + +Returns all jobs started within a time window. + +_Restricted to `super_user` roles._ + +- `operation` _(required)_ — `search_jobs_by_start_date` +- `from_date` _(required)_ — start of the search window (ISO 8601 format) +- `to_date` _(required)_ — end of the search window (ISO 8601 format) + +```json +{ + "operation": "search_jobs_by_start_date", + "from_date": "2024-01-01T00:00:00.000+0000", + "to_date": "2024-01-02T00:00:00.000+0000" +} +``` + +## Related Documentation + +- [Data Loader](./data-loader.md) — Component-based data loading as part of deployment +- [Operations API](../operations-api/overview.md) — Sending operations to Harper +- [Transaction Logging](./transaction.md) — Recording a history of changes made to tables diff --git a/reference/database/overview.md b/reference/database/overview.md new file mode 100644 index 00000000..8025d577 --- /dev/null +++ b/reference/database/overview.md @@ -0,0 +1,123 @@ +--- +title: Overview +--- + + + + + +# Database + +Harper's database system is the foundation of its data storage and retrieval capabilities. It is built on top of [LMDB](https://www.symas.com/lmdb) (Lightning Memory-Mapped Database) and is designed to provide high performance, ACID-compliant storage with automatic indexing and flexible schema support. + +## How Harper Stores Data + +Harper organizes data in a three-tier hierarchy: + +- **Databases** — containers that group related tables together in a single transactional file +- **Tables** — collections of records with a common data pattern +- **Records** — individual data objects with a primary key and any number of attributes + +All tables within a database share the same transaction context, meaning reads and writes across tables in the same database can be performed atomically. + +### The Schema System and Auto-REST + +The most common way to use Harper's database is through the **schema system**. By defining a [GraphQL schema](./schema.md), you can: + +- Declare tables and their attribute types +- Control which attributes are indexed +- Define relationships between tables +- Automatically expose data via REST, MQTT, and other interfaces + +You do not need to build custom application code to use the database. A schema definition alone is enough to create fully functional, queryable REST endpoints for your data. + +For more advanced use cases, you can extend table behavior using the [Resource API](../resources/resource-api.md). + +### Architecture Overview + +``` + ┌──────────┐ ┌──────────┐ + │ Clients │ │ Clients │ + └────┬─────┘ └────┬─────┘ + │ │ + ▼ ▼ + ┌────────────────────────────────────────┐ + │ │ + │ Socket routing/management │ + ├───────────────────────┬────────────────┤ + │ │ │ + │ Server Interfaces ─►│ Authentication │ + │ RESTful HTTP, MQTT │ Authorization │ + │ ◄─┤ │ + │ ▲ └────────────────┤ + │ │ │ │ + ├───┼──────────┼─────────────────────────┤ + │ │ │ ▲ │ + │ ▼ Resources ▲ │ ┌───────────┐ │ + │ │ └─┤ │ │ + ├─────────────────┴────┐ │ App │ │ + │ ├─►│ resources │ │ + │ Database tables │ └───────────┘ │ + │ │ ▲ │ + ├──────────────────────┘ │ │ + │ ▲ ▼ │ │ + │ ┌────────────────┐ │ │ + │ │ External │ │ │ + │ │ data sources ├────┘ │ + │ │ │ │ + │ └────────────────┘ │ + │ │ + └────────────────────────────────────────┘ +``` + +## Databases + +Harper databases hold a collection of tables in a single transactionally-consistent file. This means reads and writes can be performed atomically across all tables in the same database, and multi-table transactions are replicated as a single atomic unit. + +The default database is named `data`. Most applications will use this default. Additional databases can be created for namespace separation — this is particularly useful for components designed for reuse across multiple applications, where a unique database name avoids naming collisions. + +> **Note:** Transactions do not preserve atomicity across different databases, only across tables within the same database. + +## Tables + +Tables group records with a common data pattern. A table must have: + +- **Table name** — used to identify the table +- **Primary key** — the unique identifier for each record (also referred to as `hash_attribute` in the Operations API) + +Primary keys must be unique. If a primary key is not provided on insert, Harper auto-generates one: + +- A **UUID string** for primary keys typed as `String` or `ID` +- An **auto-incrementing integer** for primary keys typed as `Int`, `Long`, or `Any` + +Numeric primary keys are more efficient than UUIDs for large tables. + +## Dynamic vs. Defined Schemas + +Harper tables can operate in two modes: + +**Defined schemas** (recommended): Tables with schemas explicitly declared using [GraphQL schema syntax](./schema.md). This provides predictable structure, precise control over indexing, and data integrity. Schemas are declared in a component's `schema.graphql` file. + +**Dynamic schemas**: Tables created through the Operations API or Studio without a schema definition. Attributes are reflexively added as data is ingested. All top-level attributes are automatically indexed. Dynamic schema tables automatically maintain `__createdtime__` and `__updatedtime__` audit attributes on every record. + +It is best practice to define schemas for production tables. Dynamic schemas are convenient for experimentation and prototyping. + +## Key Concepts + +For deeper coverage of each database feature, see the dedicated pages in this section: + +- **[Schema](./schema.md)** — Defining table structure, types, indexes, relationships, and computed properties using GraphQL schema syntax +- **[API](./api.md)** — The `tables`, `databases`, `transaction()`, and `createBlob()` globals for interacting with the database from code +- **[Data Loader](./data-loader.md)** — Loading seed or initial data into tables as part of component deployment +- **[Storage Algorithm](./storage-algorithm.md)** — How Harper stores data using LMDB with universal indexing and ACID compliance +- **[Jobs](./jobs.md)** — Asynchronous bulk data operations (CSV import/export, S3 import/export) +- **[System Tables](./system-tables.md)** — Harper internal tables for analytics, data loader state, and other system features +- **[Compaction](./compaction.md)** — Reducing database file size by eliminating fragmentation and free space +- **[Transaction Logging](./transaction.md)** — Recording and querying a history of data changes via audit log and transaction log + +## Related Documentation + +- [REST](../rest/overview.md) — HTTP interface built on top of the database resource system +- [Resources](../resources/overview.md) — Custom application logic extending database tables +- [Operations API](../operations-api/overview.md) — Direct database management operations (create/drop databases and tables, insert/update/delete records) +- [Configuration](../configuration/overview.md) — Storage configuration options (compression, blob paths, compaction) diff --git a/reference/database/schema.md b/reference/database/schema.md new file mode 100644 index 00000000..905aa1f9 --- /dev/null +++ b/reference/database/schema.md @@ -0,0 +1,503 @@ +--- +title: Schema +--- + + + + + + + + + + + +# Schema + +Harper uses GraphQL Schema Definition Language (SDL) to declaratively define table structure. Schema definitions are loaded from `.graphql` files in a component directory and control table creation, attribute types, indexing, and relationships. + +## Overview + +Added in: v4.2.0 + +Schemas are defined using standard [GraphQL type definitions](https://graphql.org/learn/schema/) with Harper-specific directives. A schema definition: + +- Ensures required tables exist when a component is deployed +- Enforces attribute types and required constraints +- Controls which attributes are indexed +- Defines relationships between tables +- Configures computed properties, expiration, and audit behavior + +Schemas are flexible by default — records may include additional properties beyond those declared in the schema. Use the `@sealed` directive to prevent this. + +A minimal example: + +```graphql +type Dog @table { + id: Long @primaryKey + name: String + breed: String + age: Int +} + +type Breed @table { + id: Long @primaryKey + name: String @indexed +} +``` + +### Loading Schemas + +In a component's `config.yaml`, specify the schema file with the `graphqlSchema` plugin: + +```yaml +graphqlSchema: + files: 'schema.graphql' +``` + +Keep in mind that both plugins and applications can specify schemas. + +## Type Directives + +Type directives apply to the entire table type definition. + +### `@table` + +Marks a GraphQL type as a Harper database table. The type name becomes the table name by default. + +```graphql +type MyTable @table { + id: Long @primaryKey +} +``` + +Optional arguments: + +| Argument | Type | Default | Description | +| ------------ | --------- | -------------- | ----------------------------------------------------------------------- | +| `table` | `String` | type name | Override the table name | +| `database` | `String` | `"data"` | Database to place the table in | +| `expiration` | `Int` | — | Auto-expire records after this many seconds (useful for caching tables) | +| `audit` | `Boolean` | config default | Enable audit log for this table | + +**Examples:** + +```graphql +# Override table name +type Product @table(table: "products") { + id: Long @primaryKey +} + +# Place in a specific database +type Order @table(database: "commerce") { + id: Long @primaryKey +} + +# Auto-expire records after 1 hour (e.g., a session cache) +type Session @table(expiration: 3600) { + id: Long @primaryKey + userId: String +} + +# Enable audit log for this table explicitly +type AuditedRecord @table(audit: true) { + id: Long @primaryKey + value: String +} + +# Combine multiple arguments +type Event @table(database: "analytics", expiration: 86400) { + id: Long @primaryKey + name: String @indexed +} +``` + +**Database naming:** Since all tables default to the `data` database, when designing plugins or applications, consider using unique database names to avoid table naming collisions. + +### `@export` + +Exposes the table as an externally accessible resource endpoint, available via REST, MQTT, and other interfaces. + +```graphql +type MyTable @table @export(name: "my-table") { + id: Long @primaryKey +} +``` + +The optional `name` parameter specifies the URL path segment (e.g., `/my-table/`). Without `name`, the type name is used. + +### `@sealed` + +Prevents records from including any properties beyond those explicitly declared in the type. By default, Harper allows records to have additional properties. + +```graphql +type StrictRecord @table @sealed { + id: Long @primaryKey + name: String +} +``` + +## Field Directives + +Field directives apply to individual attributes in a type definition. + +### `@primaryKey` + +Designates the attribute as the table's primary key. Primary keys must be unique; inserts with a duplicate primary key are rejected. + +```graphql +type Product @table { + id: Long @primaryKey + name: String +} +``` + +If no primary key is provided on insert, Harper auto-generates one: + +- **UUID string** — when type is `String` or `ID` +- **Auto-incrementing integer** — when type is `Int`, `Long`, or `Any` + +Changed in: v4.4.0 + +Auto-incrementing integer primary keys were added. Previously only UUID generation was supported for `ID` and `String` types. + +Using `Long` or `Any` is recommended for auto-generated numeric keys. `Int` is limited to 32-bit and may be insufficient for large tables. + +### `@indexed` + +Creates a secondary index on the attribute for fast querying. Required for filtering by this attribute in REST queries, SQL, or NoSQL operations. + +```graphql +type Product @table { + id: Long @primaryKey + category: String @indexed + price: Float @indexed +} +``` + +If the field value is an array, each element in the array is individually indexed, enabling queries by any individual value. + +Null values are indexed by default (added in v4.3.0), enabling queries like `GET /Product/?category=null`. + +### `@createdTime` + +Automatically assigns a creation timestamp (Unix epoch milliseconds) to the attribute when a record is created. + +```graphql +type Event @table { + id: Long @primaryKey + createdAt: Long @createdTime +} +``` + +### `@updatedTime` + +Automatically assigns a timestamp (Unix epoch milliseconds) each time the record is updated. + +```graphql +type Event @table { + id: Long @primaryKey + updatedAt: Long @updatedTime +} +``` + +## Relationships + +Added in: v4.3.0 + +The `@relationship` directive defines how one table relates to another through a foreign key. Relationships enable join queries and allow related records to be selected as nested properties in query results. + +### `@relationship(from: attribute)` — many-to-one or many-to-many + +The foreign key is in this table, referencing the primary key of the target table. + +```graphql +type RealityShow @table @export { + id: Long @primaryKey + networkId: Long @indexed # foreign key + network: Network @relationship(from: networkId) # many-to-one + title: String @indexed +} + +type Network @table @export { + id: Long @primaryKey + name: String @indexed # e.g. "Bravo", "Peacock", "Netflix" +} +``` + +Query shows by network name: + +```http +GET /RealityShow?network.name=Bravo +``` + +If the foreign key is an array, this establishes a many-to-many relationship (e.g., a show with multiple streaming homes): + +```graphql +type RealityShow @table @export { + id: Long @primaryKey + networkIds: [Long] @indexed + networks: [Network] @relationship(from: networkIds) +} +``` + +### `@relationship(to: attribute)` — one-to-many or many-to-many + +The foreign key is in the target table, referencing the primary key of this table. The result type must be an array. + +```graphql +type Network @table @export { + id: Long @primaryKey + name: String @indexed # e.g. "Bravo", "Peacock", "Netflix" + shows: [RealityShow] @relationship(to: networkId) # one-to-many + # shows like "Real Housewives of Atlanta", "The Traitors", "Vanderpump Rules" +} +``` + +### `@relationship(from: attribute, to: attribute)` — foreign key to foreign key + +Both `from` and `to` can be specified together to define a relationship where neither side uses the primary key — a foreign key to foreign key join. This is useful for many-to-many relationships that join on non-primary-key attributes. + +```graphql +type OrderItem @table @export { + id: Long @primaryKey + orderId: Long @indexed + productSku: Long @indexed + product: Product @relationship(from: productSku, to: sku) # join on sku, not primary key +} + +type Product @table @export { + id: Long @primaryKey + sku: Long @indexed + name: String +} +``` + +Schemas can also define self-referential relationships, enabling parent-child hierarchies within a single table. + +## Computed Properties + +Added in: v4.4.0 + +The `@computed` directive marks a field as derived from other fields at query time. Computed properties are not stored in the database but are evaluated when the field is accessed. + +```graphql +type Product @table { + id: Long @primaryKey + price: Float + taxRate: Float + totalPrice: Float @computed(from: "price + (price * taxRate)") +} +``` + +The `from` argument is a JavaScript expression that can reference other record fields. + +Computed properties can also be defined in JavaScript for complex logic: + +```graphql +type Product @table { + id: Long @primaryKey + totalPrice: Float @computed +} +``` + +```javascript +tables.Product.setComputedAttribute('totalPrice', (record) => { + return record.price + record.price * record.taxRate; +}); +``` + +Computed properties are not included in query results by default — use `select` to include them explicitly. + +### Computed Indexes + +Computed properties can be indexed with `@indexed`, enabling custom indexing strategies such as composite indexes, full-text search, or vector indexing: + +```graphql +type Product @table { + id: Long @primaryKey + tags: String + tagsSeparated: String[] @computed(from: "tags.split(/\\s*,\\s*/)") @indexed +} +``` + +When using a JavaScript function for an indexed computed property, use the `version` argument to ensure re-indexing when the function changes: + +```graphql +type Product @table { + id: Long @primaryKey + totalPrice: Float @computed(version: 1) @indexed +} +``` + +Increment `version` whenever the computation function changes. Failing to do so can result in an inconsistent index. + +## Vector Indexing + +Added in: v4.6.0 + +Use `@indexed(type: "HNSW")` to create a vector index using the Hierarchical Navigable Small World algorithm, designed for fast approximate nearest-neighbor search on high-dimensional vectors. + +```graphql +type Document @table { + id: Long @primaryKey + textEmbeddings: [Float] @indexed(type: "HNSW") +} +``` + +Query by nearest neighbors using the `sort` parameter: + +```javascript +let results = Document.search({ + sort: { attribute: 'textEmbeddings', target: searchVector }, + limit: 5, +}); +``` + +HNSW can be combined with filter conditions: + +```javascript +let results = Document.search({ + conditions: [{ attribute: 'price', comparator: 'lt', value: 50 }], + sort: { attribute: 'textEmbeddings', target: searchVector }, + limit: 5, +}); +``` + +### HNSW Parameters + +| Parameter | Default | Description | +| ---------------------- | ----------------- | --------------------------------------------------------------------------------------------------- | +| `distance` | `"cosine"` | Distance function: `"euclidean"` or `"cosine"` (negative cosine similarity) | +| `efConstruction` | `100` | Max nodes explored during index construction. Higher = better recall, lower = better performance | +| `M` | `16` | Preferred connections per graph layer. Higher = more space, better recall for high-dimensional data | +| `optimizeRouting` | `0.5` | Heuristic aggressiveness for omitting redundant connections (0 = off, 1 = most aggressive) | +| `mL` | computed from `M` | Normalization factor for level generation | +| `efSearchConstruction` | `50` | Max nodes explored during search | + +Example with custom parameters: + +```graphql +type Document @table { + id: Long @primaryKey + textEmbeddings: [Float] @indexed(type: "HNSW", distance: "euclidean", optimizeRouting: 0, efSearchConstruction: 100) +} +``` + +## Field Types + +Harper supports the following field types: + +| Type | Description | +| --------- | ---------------------------------------------------------------------------------------------- | +| `String` | Unicode text, UTF-8 encoded | +| `Int` | 32-bit signed integer (−2,147,483,648 to 2,147,483,647) | +| `Long` | 54-bit signed integer (−9,007,199,254,740,992 to 9,007,199,254,740,992) | +| `Float` | 64-bit double precision floating point | +| `BigInt` | Integer up to ~300 digits. Note: distinct JavaScript type; handle appropriately in custom code | +| `Boolean` | `true` or `false` | +| `ID` | String; indicates a non-human-readable identifier | +| `Any` | Any primitive, object, or array | +| `Date` | JavaScript `Date` object | +| `Bytes` | Binary data as `Buffer` or `Uint8Array` | +| `Blob` | Binary large object; designed for streaming content >20KB | + +Added `BigInt` in v4.3.0 + +Added `Blob` in v4.5.0 + +Arrays of a type are expressed with `[Type]` syntax (e.g., `[Float]` for a vector). + +### Blob Type + +Added in: v4.5.0 + +`Blob` fields are designed for large binary content. Harper's `Blob` type implements the [Web API `Blob` interface](https://developer.mozilla.org/en-US/docs/Web/API/Blob), so all standard `Blob` methods (`.text()`, `.arrayBuffer()`, `.stream()`, `.slice()`) are available. Unlike `Bytes`, blobs are stored separately from the record, support streaming, and do not need to be held entirely in memory. Use `Blob` for content typically larger than 20KB (images, video, audio, large HTML, etc.). + +See [Blob usage details](#blob-usage) below. + +#### Blob Usage + +Declare a blob field: + +```graphql +type MyTable @table { + id: Any! @primaryKey + data: Blob +} +``` + +Create and store a blob using [`createBlob()`](./api.md#createblobdata-options): + +```javascript +let blob = createBlob(largeBuffer); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Retrieve blob data using standard Web API `Blob` methods: + +```javascript +let record = await MyTable.get('my-record'); +let buffer = await record.data.bytes(); // ArrayBuffer +let text = await record.data.text(); // string +let stream = record.data.stream(); // ReadableStream +``` + +Blobs support asynchronous streaming, meaning a record can reference a blob before it is fully written to storage. Use `saveBeforeCommit: true` to wait for full write before committing: + +```javascript +let blob = createBlob(stream, { saveBeforeCommit: true }); +await MyTable.put({ id: 'my-record', data: blob }); +``` + +Any string or buffer assigned to a `Blob` field in a `put`, `patch`, or `publish` is automatically coerced to a `Blob`. + +When returning a blob via REST, register an error handler to handle interrupted streams: + +```javascript +export class MyEndpoint extends MyTable { + async get(target) { + const record = super.get(target); + let blob = record.data; + blob.on('error', () => { + MyTable.invalidate(target); + }); + return { status: 200, headers: {}, body: blob }; + } +} +``` + +## Dynamic Schema Behavior + +When a table is created through the Operations API or Studio without a schema definition, it follows dynamic schema behavior: + +- Attributes are reflexively created as data is ingested +- All top-level attributes are automatically indexed +- Records automatically get `__createdtime__` and `__updatedtime__` audit attributes + +Dynamic schema tables are additive — new attributes are added as new data arrives. Existing records will have `null` for any newly added attributes. + +Use `create_attribute` and `drop_attribute` operations to manually manage attributes on dynamic schema tables. See the [Operations API](../operations-api/operations.md#databases--tables) for details. + +## OpenAPI Specification + +Tables exported with `@export` are described via an `/openapi` endpoint on the main HTTP server associated with the REST service (default port 9926). + +```http +GET http://localhost:9926/openapi +``` + +This provides an OpenAPI 3.x description of all exported resource endpoints. The endpoint is a starting guide and may not cover every edge case. + +## Renaming Tables + +Harper does **not** support renaming tables. Changing a type name in a schema definition creates a new, empty table — the original table and its data are unaffected. + +## Related Documentation + +- [JavaScript API](./api.md) — `tables`, `databases`, `transaction()`, and `createBlob()` globals for working with schema-defined tables in code +- [Data Loader](./data-loader.md) — Seed tables with initial data alongside schema deployment +- [REST Querying](../rest/querying.md) — Querying tables via HTTP using schema-defined attributes and relationships +- [Resources](../resources/resource-api.md) — Extending table behavior with custom application logic +- [Storage Algorithm](./storage-algorithm.md) — How Harper indexes and stores schema-defined data +- [Configuration](../configuration/options.md) — Component configuration for schemas diff --git a/reference/database/sql.md b/reference/database/sql.md new file mode 100644 index 00000000..d27fec67 --- /dev/null +++ b/reference/database/sql.md @@ -0,0 +1,345 @@ +--- +title: SQL +--- + + + + + + + + + + +:::warning +SQL querying is not recommended for production use or on large tables. SQL queries often do not utilize indexes and are not optimized for performance. Use the [REST interface](../rest/overview.md) for production data access — it provides a more stable, secure, and performant interface. SQL is intended for ad-hoc data investigation and administrative queries. +::: + +Harper includes a SQL interface supporting SELECT, INSERT, UPDATE, and DELETE operations. Tables are referenced using `database.table` notation (e.g., `dev.dog`). + +## Operations API + +SQL queries are executed via the Operations API using the `sql` operation: + +- `operation` _(required)_ — must be `sql` +- `sql` _(required)_ — the SQL statement to execute + +### Select + +```json +{ + "operation": "sql", + "sql": "SELECT * FROM dev.dog WHERE id = 1" +} +``` + +### Insert + +```json +{ + "operation": "sql", + "sql": "INSERT INTO dev.dog (id, dog_name) VALUE (22, 'Simon')" +} +``` + +Response: + +```json +{ + "message": "inserted 1 of 1 records", + "inserted_hashes": [22], + "skipped_hashes": [] +} +``` + +### Update + +```json +{ + "operation": "sql", + "sql": "UPDATE dev.dog SET dog_name = 'penelope' WHERE id = 1" +} +``` + +### Delete + +```json +{ + "operation": "sql", + "sql": "DELETE FROM dev.dog WHERE id = 1" +} +``` + +--- + +## SELECT Syntax + +```sql +SELECT * FROM dev.dog +SELECT id, dog_name, age FROM dev.dog +SELECT * FROM dev.dog ORDER BY age +SELECT * FROM dev.dog ORDER BY age DESC +SELECT DISTINCT breed_id FROM dev.dog +SELECT COUNT(*) FROM dev.dog WHERE age > 3 +``` + +### Joins + +Supported join types: `INNER JOIN`, `LEFT [OUTER] JOIN`, `RIGHT [OUTER] JOIN`, `FULL OUTER JOIN`, `CROSS JOIN`. + +```sql +SELECT d.id, d.dog_name, b.name +FROM dev.dog AS d +INNER JOIN dev.breed AS b ON d.breed_id = b.id +WHERE d.owner_name IN ('Kyle', 'Zach') +ORDER BY d.dog_name +``` + +--- + +## Features Matrix + +| INSERT | | +| ---------------------------------- | --- | +| Values — multiple values supported | ✔ | +| Sub-SELECT | ✗ | + +| UPDATE | | +| -------------- | --- | +| SET | ✔ | +| Sub-SELECT | ✗ | +| Conditions | ✔ | +| Date Functions | ✔ | +| Math Functions | ✔ | + +| DELETE | | +| ---------- | --- | +| FROM | ✔ | +| Sub-SELECT | ✗ | +| Conditions | ✔ | + +| SELECT | | +| ------------------- | --- | +| Column SELECT | ✔ | +| Aliases | ✔ | +| Aggregate Functions | ✔ | +| Date Functions | ✔ | +| Math Functions | ✔ | +| Constant Values | ✔ | +| DISTINCT | ✔ | +| Sub-SELECT | ✗ | + +| FROM | | +| ---------------- | --- | +| Multi-table JOIN | ✔ | +| INNER JOIN | ✔ | +| LEFT OUTER JOIN | ✔ | +| LEFT INNER JOIN | ✔ | +| RIGHT OUTER JOIN | ✔ | +| RIGHT INNER JOIN | ✔ | +| FULL JOIN | ✔ | +| UNION | ✗ | +| Sub-SELECT | ✗ | +| TOP | ✔ | + +| WHERE | | +| ---------------- | --- | +| Multi-Conditions | ✔ | +| Wildcards | ✔ | +| IN | ✔ | +| LIKE | ✔ | +| AND, OR, NOT | ✔ | +| NULL | ✔ | +| BETWEEN | ✔ | +| EXISTS, ANY, ALL | ✔ | +| Compare columns | ✔ | +| Date Functions | ✔ | +| Sub-SELECT | ✗ | + +| GROUP BY | | +| --------------------- | --- | +| Multi-Column GROUP BY | ✔ | + +| HAVING | | +| ----------------------------- | --- | +| Aggregate function conditions | ✔ | + +| ORDER BY | | +| --------------------- | --- | +| Multi-Column ORDER BY | ✔ | +| Aliases | ✔ | + +--- + +## Functions + +### Aggregate + +| Function | Description | +| ---------------------- | ----------------------------------------------------- | +| `AVG(expr)` | Average of a numeric expression. | +| `COUNT(col)` | Count of rows matching the criteria (nulls excluded). | +| `MAX(col)` | Largest value in a column. | +| `MIN(col)` | Smallest value in a column. | +| `SUM(col)` | Sum of numeric values. | +| `GROUP_CONCAT(expr)` | Comma-separated string of non-null values. | +| `ARRAY(expr)` | Returns a list of data as a field. | +| `DISTINCT_ARRAY(expr)` | Returns a deduplicated list. | + +### Conversion + +| Function | Description | +| ---------------------------------- | ------------------------------------------ | +| `CAST(expr AS datatype)` | Converts a value to the specified type. | +| `CONVERT(datatype, expr[, style])` | Converts a value from one type to another. | + +### String + +| Function | Description | +| ----------------------------- | ------------------------------------------------------- | +| `CONCAT(s1, s2, ...)` | Joins strings together. | +| `CONCAT_WS(sep, s1, s2, ...)` | Joins strings with a separator. | +| `INSTR(s1, s2)` | Position of s2 within s1. | +| `LEN(s)` | Length of a string. | +| `LOWER(s)` | Converts to lower-case. | +| `UPPER(s)` | Converts to upper-case. | +| `REPLACE(s, old, new)` | Replaces all instances of old with new. | +| `SUBSTRING(s, pos, len)` | Extracts a substring. | +| `TRIM([chars FROM] s)` | Removes leading and trailing spaces or specified chars. | +| `REGEXP pattern` | Matches a regular expression pattern. | +| `REGEXP_LIKE(col, pattern)` | Matches a regular expression pattern (function form). | + +### Mathematical + +| Function | Description | +| ------------------ | --------------------------------------- | +| `ABS(expr)` | Absolute value. | +| `CEIL(n)` | Smallest integer ≥ n. | +| `FLOOR(n)` | Largest integer ≤ n. | +| `EXP(n)` | e to the power of n. | +| `ROUND(n, places)` | Rounds to the specified decimal places. | +| `SQRT(expr)` | Square root. | +| `RANDOM(seed)` | Pseudo-random number. | + +### Logical + +| Function | Description | +| -------------------------------- | ------------------------------------------------------- | +| `IF(cond, true_val, false_val)` | Returns one of two values based on a condition. | +| `IIF(cond, true_val, false_val)` | Alias for IF. | +| `IFNULL(expr, alt)` | Returns alt if expr is null. | +| `NULLIF(expr1, expr2)` | Returns null if expr1 = expr2, otherwise returns expr1. | + +--- + +## Date & Time Functions + +All SQL date operations use UTC internally. Dates are parsed as [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), then [RFC 2822](https://tools.ietf.org/html/rfc2822#section-3.3), then `new Date(string)`. + +| Function | Returns | +| ------------------------------------- | ------------------------------------------------------------------------------------------------ | +| `CURRENT_DATE()` | Current date as `YYYY-MM-DD`. | +| `CURRENT_TIME()` | Current time as `HH:mm:ss.SSS`. | +| `CURRENT_TIMESTAMP` | Current Unix timestamp in milliseconds. | +| `NOW()` | Current Unix timestamp in milliseconds. | +| `GETDATE()` | Current Unix timestamp in milliseconds. | +| `GET_SERVER_TIME()` | Current date/time in server's timezone as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. | +| `DATE([date_string])` | Date formatted as `YYYY-MM-DDTHH:mm:ss.SSSZZ`. | +| `DATE_ADD(date, value, interval)` | Adds time to a date; returns Unix ms. | +| `DATE_SUB(date, value, interval)` | Subtracts time from a date; returns Unix ms. | +| `DATE_DIFF(date1, date2[, interval])` | Difference between two dates. | +| `DATE_FORMAT(date, format)` | Formats a date using [moment.js format strings](https://momentjs.com/docs/#/displaying/format/). | +| `EXTRACT(date, date_part)` | Extracts a part (year, month, day, hour, minute, second, millisecond). | +| `OFFSET_UTC(date, offset)` | Returns the date adjusted by offset minutes (or hours if < 16). | +| `DAY(date)` | Day of the month. | +| `DAYOFWEEK(date)` | Day of the week (0=Sunday … 6=Saturday). | +| `HOUR(datetime)` | Hour part (0–838). | +| `MINUTE(datetime)` | Minute part (0–59). | +| `MONTH(date)` | Month (1–12). | +| `SECOND(datetime)` | Seconds part (0–59). | +| `YEAR(date)` | Year. | + +`DATE_ADD` and `DATE_SUB` accept these interval values: + +| Key | Shorthand | +| ------------ | --------- | +| years | y | +| quarters | Q | +| months | M | +| weeks | w | +| days | d | +| hours | h | +| minutes | m | +| seconds | s | +| milliseconds | ms | + +--- + +## JSON Search + +`SEARCH_JSON(expression, attribute)` queries nested JSON data that is not indexed by Harper. It uses the [JSONata](https://docs.jsonata.org/overview.html) library and works in both SELECT and WHERE clauses. + +```sql +-- Find records where the name array contains "Harper" +SELECT * FROM dev.dog +WHERE SEARCH_JSON('"Harper" in *', name) +``` + +```sql +-- Select and filter nested JSON in one query +SELECT m.title, + SEARCH_JSON($[name in ["Actor A", "Actor B"]].{"actor": name}, c.`cast`) AS cast +FROM movies.credits c +INNER JOIN movies.movie m ON c.movie_id = m.id +WHERE SEARCH_JSON($count($[name in ["Actor A", "Actor B"]]), c.`cast`) >= 2 +``` + +--- + +## Geospatial Functions + +Geospatial data must be stored using the [GeoJSON standard](https://geojson.org/) in a single column. All coordinates are in `[longitude, latitude]` format. + +| Function | Description | +| -------------------------------------------- | ------------------------------------------------------------------ | +| `geoArea(geoJSON)` | Area of features in square meters. | +| `geoLength(geoJSON[, units])` | Length in km (default), or degrees/radians/miles. | +| `geoDistance(point1, point2[, units])` | Distance between two points. | +| `geoNear(point1, point2, distance[, units])` | Returns boolean: true if points are within the specified distance. | +| `geoContains(geo1, geo2)` | Returns boolean: true if geo2 is completely contained by geo1. | +| `geoDifference(polygon1, polygon2)` | Returns a new polygon with polygon2 clipped from polygon1. | +| `geoEqual(geo1, geo2)` | Returns boolean: true if two GeoJSON features are identical. | +| `geoCrosses(geo1, geo2)` | Returns boolean: true if the geometries cross each other. | +| `geoConvert(coordinates, geo_type[, props])` | Converts coordinates into a GeoJSON of the specified type. | + +`units` options: `'degrees'`, `'radians'`, `'miles'`, `'kilometers'` (default). + +`geo_type` options for `geoConvert`: `'point'`, `'lineString'`, `'multiLineString'`, `'multiPoint'`, `'multiPolygon'`, `'polygon'`. + +--- + +## Logical Operators + +| Keyword | Description | +| --------- | ------------------------------------------------ | +| `BETWEEN` | Returns values within a given range (inclusive). | +| `IN` | Specifies multiple values in a WHERE clause. | +| `LIKE` | Searches for a pattern. | + +--- + +## Reserved Words + +If a database, table, or attribute name conflicts with a reserved word, wrap it in backticks or brackets: + +```sql +SELECT * FROM data.`ASSERT` +SELECT * FROM data.[ASSERT] +``` + +
+Full reserved word list + +ABSOLUTE, ACTION, ADD, AGGR, ALL, ALTER, AND, ANTI, ANY, APPLY, ARRAY, AS, ASSERT, ASC, ATTACH, AUTOINCREMENT, AUTO_INCREMENT, AVG, BEGIN, BETWEEN, BREAK, BY, CALL, CASE, CAST, CHECK, CLASS, CLOSE, COLLATE, COLUMN, COLUMNS, COMMIT, CONSTRAINT, CONTENT, CONTINUE, CONVERT, CORRESPONDING, COUNT, CREATE, CROSS, CUBE, CURRENT_TIMESTAMP, CURSOR, DATABASE, DECLARE, DEFAULT, DELETE, DELETED, DESC, DETACH, DISTINCT, DOUBLEPRECISION, DROP, ECHO, EDGE, END, ENUM, ELSE, EXCEPT, EXISTS, EXPLAIN, FALSE, FETCH, FIRST, FOREIGN, FROM, GO, GRAPH, GROUP, GROUPING, HAVING, HDB_HASH, HELP, IF, IDENTITY, IS, IN, INDEX, INNER, INSERT, INSERTED, INTERSECT, INTO, JOIN, KEY, LAST, LET, LEFT, LIKE, LIMIT, LOOP, MATCHED, MATRIX, MAX, MERGE, MIN, MINUS, MODIFY, NATURAL, NEXT, NEW, NOCASE, NO, NOT, NULL, OFF, ON, ONLY, OFFSET, OPEN, OPTION, OR, ORDER, OUTER, OVER, PATH, PARTITION, PERCENT, PLAN, PRIMARY, PRINT, PRIOR, QUERY, READ, RECORDSET, REDUCE, REFERENCES, RELATIVE, REPLACE, REMOVE, RENAME, REQUIRE, RESTORE, RETURN, RETURNS, RIGHT, ROLLBACK, ROLLUP, ROW, SCHEMA, SCHEMAS, SEARCH, SELECT, SEMI, SET, SETS, SHOW, SOME, SOURCE, STRATEGY, STORE, SYSTEM, SUM, TABLE, TABLES, TARGET, TEMP, TEMPORARY, TEXTSTRING, THEN, TIMEOUT, TO, TOP, TRAN, TRANSACTION, TRIGGER, TRUE, TRUNCATE, UNION, UNIQUE, UPDATE, USE, USING, VALUE, VERTEX, VIEW, WHEN, WHERE, WHILE, WITH, WORK + +
diff --git a/reference/database/storage-algorithm.md b/reference/database/storage-algorithm.md new file mode 100644 index 00000000..35af971b --- /dev/null +++ b/reference/database/storage-algorithm.md @@ -0,0 +1,111 @@ +--- +title: Storage Algorithm +--- + + + + +# Storage Algorithm + +Harper's storage algorithm is the foundation of all database functionality. It is built on top of [LMDB](https://www.symas.com/lmdb) (Lightning Memory-Mapped Database), a high-performance key-value store, and extends it with automatic indexing, query-language-agnostic data access, and ACID compliance. + +## Query Language Agnostic + +Harper's storage layer is decoupled from any specific query language. Data inserted via NoSQL operations can be read via SQL, REST, or the Resource API — all accessing the same underlying storage. This architecture allows Harper to add new query interfaces without changing how data is stored. + +## ACID Compliance + +Harper provides full ACID compliance on each node using Multi-Version Concurrency Control (MVCC) through LMDB: + +- **Atomicity**: All writes in a transaction either fully commit or fully roll back +- **Consistency**: Each transaction moves data from one valid state to another +- **Isolation**: Readers and writers operate independently — readers do not block writers and writers do not block readers +- **Durability**: Committed transactions are persisted to disk + +Each Harper table has a single writer process, eliminating deadlocks and ensuring writes are executed in the order received. Multiple reader processes can operate concurrently for high-throughput reads. + +## Universally Indexed + +Changed in: v4.3.0 — Storage performance improvements including better free-space management + +For [dynamic schema tables](./overview.md#dynamic-vs-defined-schemas), all top-level attributes are automatically indexed immediately upon ingestion — Harper reflexively creates the attribute and its index as new data arrives. For [schema-defined tables](./schema.md), indexes are created for all attributes marked with `@indexed`. + +Indexes are type-agnostic, ordering values as follows: + +1. Booleans +2. Numbers (ordered numerically) +3. Strings (ordered lexically) + +### LMDB Storage Layout + +Within the LMDB implementation, table records are grouped into a single LMDB environment file. Each attribute index is stored as a sub-database (`dbi`) within that environment. + +## Compression + +Changed in: v4.3.0 — Compression is now enabled by default for all records over 4KB + +Harper compresses record data automatically for records over 4KB. Compression settings can be configured in the [storage configuration](../configuration/options.md). Note that compression settings cannot be changed on existing databases without creating a new compacted copy — see [Compaction](./compaction.md). + +## Performance Characteristics + +Harper inherits the following performance properties from LMDB: + +- **Memory-mapped I/O**: Data is accessed via memory mapping, enabling fast reads without data duplication between disk and memory +- **Buffer cache integration**: Fully exploits the OS buffer cache for reduced I/O +- **CPU cache optimization**: Built to maximize data locality within CPU caches +- **Deadlock-free writes**: Full serialization of writers guarantees write ordering without deadlocks +- **Zero-copy reads**: Readers access data directly from the memory map without copying + +## Indexing Example + +Given a table with records like this: + +``` +┌────┬────────┬────────┐ +│ id │ field1 │ field2 │ +├────┼────────┼────────┤ +│ 1 │ A │ X │ +│ 2 │ 25 │ X │ +│ 3 │ -1 │ Y │ +│ 4 │ A │ │ +│ 5 │ true │ 2 │ +└────┴────────┴────────┘ +``` + +Harper maintains three separate LMDB sub-databases for that table: + +``` +Table (LMDB environment file) +│ +├── primary index: id +│ ┌─────┬──────────────────────────────────────┐ +│ │ Key │ Value (full record) │ +│ ├─────┼──────────────────────────────────────┤ +│ │ 1 │ { id:1, field1:"A", field2:"X" } │ +│ │ 2 │ { id:2, field1:25, field2:"X" } │ +│ │ 3 │ { id:3, field1:-1, field2:"Y" } │ +│ │ 4 │ { id:4, field1:"A" } │ +│ │ 5 │ { id:5, field1:true, field2:2 } │ +│ └─────┴──────────────────────────────────────┘ +│ +├── secondary index: field1 secondary index: field2 +│ ┌────────┬───────┐ ┌────────┬───────┐ +│ │ Key │ Value │ │ Key │ Value │ +│ ├────────┼───────┤ ├────────┼───────┤ +│ │ -1 │ 3 │ │ 2 │ 5 │ +│ │ 25 │ 2 │ │ X │ 1 │ +│ │ A │ 1 │ │ X │ 2 │ +│ │ A │ 4 │ │ Y │ 3 │ +│ │ true │ 5 │ └────────┴───────┘ +│ └────────┴───────┘ +``` + +Secondary indexes store the attribute value as the key and the record's primary key (`id`) as the value. To resolve a query result, Harper looks up the matching ids in the secondary index, then fetches the full records from the primary index. + +Indexes are ordered — booleans first, then numbers (numerically), then strings (lexically) — enabling efficient range queries across all types. + +## Related Documentation + +- [Schema](./schema.md) — Defining indexed attributes and vector indexes +- [Compaction](./compaction.md) — Reclaiming free space and applying new storage configuration to existing databases +- [Configuration](../configuration/options.md) — Storage configuration options (compression, memory maps, blob paths) diff --git a/reference/database/system-tables.md b/reference/database/system-tables.md new file mode 100644 index 00000000..683dfb6e --- /dev/null +++ b/reference/database/system-tables.md @@ -0,0 +1,158 @@ +--- +title: System Tables +--- + + + + + +# System Tables + +Harper maintains a set of internal system tables in the `system` database. These tables store analytics, job tracking, replication configuration, and other internal state. Most are read-only from the application perspective; some can be queried for observability or management purposes. + +System tables are prefixed with `hdb_` and reside in the `system` database. + +## Analytics Tables + +Added in: v4.5.0 (resource and storage analytics expansion) + +### `hdb_raw_analytics` + +Stores per-second, per-thread performance metrics. Records are written once per second (when there is activity) and include metrics for all operations, URL endpoints, and messaging topics, plus system resource information such as memory and CPU utilization. + +Records have a primary key equal to the timestamp in milliseconds since Unix epoch. + +Query with `search_by_conditions` (requires `superuser` permission): + +```json +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_raw_analytics", + "conditions": [ + { + "search_attribute": "id", + "search_type": "between", + "search_value": [1688594000000, 1688594010000] + } + ] +} +``` + +A typical record: + +```json +{ + "time": 1688594390708, + "period": 1000.8336279988289, + "metrics": [ + { + "metric": "bytes-sent", + "path": "search_by_conditions", + "type": "operation", + "median": 202, + "mean": 202, + "p95": 202, + "p90": 202, + "count": 1 + }, + { + "metric": "memory", + "threadId": 2, + "rss": 1492664320, + "heapTotal": 124596224, + "heapUsed": 119563120, + "external": 3469790, + "arrayBuffers": 798721 + }, + { + "metric": "utilization", + "idle": 138227.52767700003, + "active": 70.5066209952347, + "utilization": 0.0005098165086230495 + } + ], + "threadId": 2, + "totalBytesProcessed": 12182820, + "id": 1688594390708.6853 +} +``` + +### `hdb_analytics` + +Stores per-minute aggregate analytics. Once per minute, Harper aggregates all per-second raw entries from all threads into summary records in this table. Query it for longer-term performance trends. + +```json +{ + "operation": "search_by_conditions", + "schema": "system", + "table": "hdb_analytics", + "conditions": [ + { + "search_attribute": "id", + "search_type": "between", + "search_value": [1688194100000, 1688594990000] + } + ] +} +``` + +A typical aggregate record: + +```json +{ + "period": 60000, + "metric": "bytes-sent", + "method": "connack", + "type": "mqtt", + "median": 4, + "mean": 4, + "p95": 4, + "p90": 4, + "count": 1, + "id": 1688589569646, + "time": 1688589569646 +} +``` + +For a full reference of available metrics and their fields, see [Analytics](../analytics/overview.md 'Complete analytics metrics reference'). + +## Data Loader Table + +### `hdb_dataloader_hash` + +Added in: v4.6.0 + +Used internally by the [Data Loader](./data-loader.md) to track which records have been loaded and detect changes. Stores SHA-256 content hashes of data file records so that unchanged records are not re-written on subsequent deployments. + +This table is managed automatically by the Data Loader. No direct interaction is required. + +## Replication Tables + +### `hdb_nodes` + +Stores the configuration and state of known nodes in a cluster, including connection details, replication settings, and revoked certificate serial numbers. + +Can be queried to inspect the current replication topology: + +```json +{ + "operation": "search_by_hash", + "schema": "system", + "table": "hdb_nodes", + "hash_values": ["node-id"] +} +``` + +Used by the `add_node`, `update_node`, and related clustering operations. See [Replication](../replication/clustering.md) for details. + +### `hdb_certificate` + +Stores TLS certificates used in replication. Can be queried to inspect the certificates currently known to the cluster. + +## Related Documentation + +- [Analytics](../analytics/overview.md) — Full reference for analytics metrics tracked in `hdb_analytics` and `hdb_raw_analytics` +- [Data Loader](./data-loader.md) — Component that writes to `hdb_dataloader_hash` +- [Replication](../replication/overview.md) — Clustering and replication system that uses `hdb_nodes` and `hdb_certificate` +- [Operations API](../operations-api/overview.md) — Querying system tables using `search_by_conditions` diff --git a/reference/database/transaction.md b/reference/database/transaction.md new file mode 100644 index 00000000..3ae8847f --- /dev/null +++ b/reference/database/transaction.md @@ -0,0 +1,154 @@ +--- +title: Transaction Logging +--- + + + + + + + + +# Transaction Logging + +Harper provides two complementary mechanisms for recording a history of data changes on a table: the **audit log** and the **transaction log**. Both are available at the table level and serve different use cases. + +| Feature | Audit Log | Transaction Log | +| ----------------------------- | --------------------------------- | ------------------------------ | +| Storage | Standard Harper table (per-table) | Clustering streams (per-table) | +| Requires clustering | No | Yes | +| Available since | v4.1.0 | v4.1.0 | +| Stores original record values | Yes | No | +| Query by username | Yes | No | +| Query by primary key | Yes | No | +| Used for real-time messaging | Yes (required) | No | + +## Audit Log + +Available since: v4.1.0 + +The audit log is a data store that tracks every transaction across all tables in a database. Harper automatically creates and maintains a single audit log per database. The audit log captures the operation type, the user who made the change, the timestamp, and both the new and original record values. + +The audit log is **enabled by default**. To disable it, set [`logging.auditLog`](../logging/configuration.md) to `false` in `harperdb-config.yaml` and restart Harper. + +> The audit log is required for real-time messaging (WebSocket and MQTT subscriptions) and replication. Do not disable it if real-time features or replication are in use. + +### Audit Log Operations + +#### `read_audit_log` + +Queries the audit log for a specific table. Supports filtering by timestamp, username, or primary key value. + +**By timestamp:** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "timestamp", + "search_values": [1660585740558] +} +``` + +Timestamp behavior: + +| `search_values` | Result | +| --------------- | ---------------------------------------- | +| `[]` | All records for the table | +| `[timestamp]` | All records after the provided timestamp | +| `[from, to]` | Records between the two timestamps | + +**By username:** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "username", + "search_values": ["admin"] +} +``` + +**By primary key:** + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "hash_value", + "search_values": [318] +} +``` + +**Response example:** + +```json +{ + "operation": "update", + "user_name": "HDB_ADMIN", + "timestamp": 1607035559122.277, + "hash_values": [1, 2], + "records": [ + { + "id": 1, + "breed": "Muttzilla", + "age": 6, + "__updatedtime__": 1607035559122 + } + ], + "original_records": [ + { + "__createdtime__": 1607035556801, + "__updatedtime__": 1607035556801, + "age": 5, + "breed": "Mutt", + "id": 1, + "name": "Harper" + } + ] +} +``` + +The `original_records` field contains the record state before the operation was applied. + +#### `delete_audit_logs_before` + +Deletes audit log entries older than the specified timestamp. + +Changed in: v4.3.0 — Audit log cleanup improved to reduce resource consumption during scheduled cleanups + +Changed in: v4.5.0 — Storage reclamation: Harper automatically evicts older audit log entries when free storage drops below a configurable threshold + +```json +{ + "operation": "delete_audit_logs_before", + "schema": "dev", + "table": "dog", + "timestamp": 1598290282817 +} +``` + +--- + +## Enabling Audit Log Per Table + +You can enable or disable the audit log for individual tables using the `@table` directive's `audit` argument in your schema: + +```graphql +type Dog @table(audit: true) { + id: Long @primaryKey + name: String +} +``` + +This overrides the [`logging.auditLog`](../logging/configuration.md) global configuration for that specific table. + +## Related Documentation + +- [Logging](../logging/overview.md) — Application and system logging (separate from transaction/audit logging) +- [Replication](../replication/overview.md) — Clustering setup required for transaction logs +- [Logging Configuration](../logging/configuration.md) — Global audit log configuration (`logging.auditLog`) +- [Operations API](../operations-api/overview.md) — Sending operations to Harper diff --git a/reference/environment-variables/overview.md b/reference/environment-variables/overview.md new file mode 100644 index 00000000..0c72b83b --- /dev/null +++ b/reference/environment-variables/overview.md @@ -0,0 +1,77 @@ +--- +id: overview +title: Environment Variables +--- + + + + + +Harper supports loading environment variables in Harper applications `process.env` using the built-in `loadEnv` plugin. This is the standard way to supply secrets and configuration to your Harper components without hardcoding values. `loadEnv` does **not** need to be installed as it is built into Harper and only needs to be declared in your `config.yaml`. + +:::note +If you are looking for information on how to configure your Harper installation using environment variables, see [Configuration](../configuration/overview.md) section for more information. +::: + +## Basic Usage + +```yaml +loadEnv: + files: '.env' +``` + +This loads the `.env` file from the root of your component directory into `process.env`. + +## Load Order + +> **Important:** Specify `loadEnv` first in your `config.yaml` so that environment variables are loaded before any other components start. + +```yaml +# config.yaml — loadEnv must come first +loadEnv: + files: '.env' + +rest: true + +myApp: + files: './src/*.js' +``` + +Because Harper is a single-process application, environment variables are loaded onto `process.env` and are shared across all components. As long as `loadEnv` is listed before dependent components, those components will have access to the loaded variables. + +## Override Behavior + +By default, `loadEnv` follows the standard dotenv convention: **existing environment variables take precedence** over values in `.env` files. This means variables already set in the shell or container environment will not be overwritten. + +To override existing environment variables, use the `override` option: + +```yaml +loadEnv: + files: '.env' + override: true +``` + +## Multiple Files + +As a Harper plugin, `loadEnv` supports multiple files using either glob patterns or a list of files in the configuration: + +```yaml +loadEnv: + files: + - '.env' + - '.env.local' +``` + +or + +```yaml +loadEnv: + files: 'env-vars/*' +``` + +Files are loaded in the order specified. + +## Related + +- [Components Overview](../components/overview.md) +- [Configuration](../configuration/overview.md) diff --git a/reference/fastify-routes/overview.md b/reference/fastify-routes/overview.md new file mode 100644 index 00000000..ddb5d3df --- /dev/null +++ b/reference/fastify-routes/overview.md @@ -0,0 +1,127 @@ +--- +title: Define Fastify Routes +--- + + + +# Define Fastify Routes + +:::note +Fastify routes are discouraged in favor of modern routing with [Custom Resources](../resources/overview.md), but remain a supported feature for backwards compatibility and specific use cases. +::: + +Harper provides a build-in plugin for loading [Fastify](https://www.fastify.io/) routes as a way to define custom endpoints for your Harper application. While we generally recommend building your endpoints/APIs with Harper's [REST interface](../rest/overview.md) for better performance and standards compliance, Fastify routes can provide an extensive API for highly customized path handling. Below is a very simple example of a route declaration. + +The fastify route handler can be configured in your application's config.yaml (this is the default config if you used the [application template](https://github.com/HarperDB/application-template)): + +```yaml +fastifyRoutes: + files: routes/*.js # specify the location of route definition modules +``` + +By default, route URLs are configured to be: + +``` +:// +``` + +However, you can specify the path to be `/` if you wish to have your routes handling the root path of incoming URLs. + +- The route below, using the default config, within the **dogs** project, with a route of **breeds** would be available at **[http://localhost:9926/dogs/breeds](http://localhost:9926/dogs/breeds)**. + +In effect, this route is just a pass-through to Harper. The same result could have been achieved by hitting the core Harper API, since it uses **hdbCore.preValidation** and **hdbCore.request**, which are defined in the "helper methods" section, below. + +```javascript +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/', + method: 'POST', + preValidation: hdbCore.preValidation, + handler: hdbCore.request, + }); +}; +``` + +## Custom Handlers + +For endpoints where you want to execute multiple operations against Harper, or perform additional processing (like an ML classification, or an aggregation, or a call to a 3rd party API), you can define your own logic in the handler. The function below will execute a query against the dogs table, and filter the results to only return those dogs over 4 years in age. + +**IMPORTANT: This route has NO preValidation and uses hdbCore.requestWithoutAuthentication, which- as the name implies- bypasses all user authentication. See the security concerns and mitigations in the "helper methods" section, below.** + +```javascript +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/:id', + method: 'GET', + handler: (request) => { + request.body= { + operation: 'sql', + sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}` + }; + + const result = await hdbCore.requestWithoutAuthentication(request); + return result.filter((dog) => dog.age > 4); + } + }); +} +``` + +## Custom preValidation Hooks + +The simple example above was just a pass-through to Harper- the exact same result could have been achieved by hitting the core Harper API. But for many applications, you may want to authenticate the user using custom logic you write, or by conferring with a 3rd party service. Custom preValidation hooks let you do just that. + +Below is an example of a route that uses a custom validation hook: + +```javascript +import customValidation from '../helpers/customValidation'; + +export default async (server, { hdbCore, logger }) => { + server.route({ + url: '/:id', + method: 'GET', + preValidation: (request) => customValidation(request, logger), + handler: (request) => { + request.body = { + operation: 'sql', + sql: `SELECT * FROM dev.dog WHERE id = ${request.params.id}`, + }; + + return hdbCore.requestWithoutAuthentication(request); + }, + }); +}; +``` + +Notice we imported customValidation from the **helpers** directory. To include a helper, and to see the actual code within customValidation, see [Helper Methods](#helper-methods). + +## Helper Methods + +When declaring routes, you are given access to 2 helper methods: hdbCore and logger. + +### hdbCore + +hdbCore contains three functions that allow you to authenticate an inbound request, and execute operations against Harper directly, by passing the standard Operations API. + +#### preValidation + +This is an array of functions used for fastify authentication. The second function takes the authorization header from the inbound request and executes the same authentication as the standard Harper Operations API (for example, `hdbCore.preValidation[1](req, resp, callback)`). It will determine if the user exists, and if they are allowed to perform this operation. **If you use the request method, you have to use preValidation to get the authenticated user**. + +#### request + +This will execute a request with Harper using the operations API. The `request.body` should contain a standard Harper operation and must also include the `hdb_user` property that was in `request.body` provided in the callback. + +#### requestWithoutAuthentication + +Executes a request against Harper without any security checks around whether the inbound user is allowed to make this request. For security purposes, you should always take the following precautions when using this method: + +- Properly handle user-submitted values, including url params. User-submitted values should only be used for `search_value` and for defining values in records. Special care should be taken to properly escape any values if user-submitted values are used for SQL. + +### logger + +This helper allows you to write directly to the log file, hdb.log. It's useful for debugging during development, although you may also use the console logger. There are 5 functions contained within logger, each of which pertains to a different **logging.level** configuration in your harperdb-config.yaml file. + +- logger.trace('Starting the handler for /dogs') +- logger.debug('This should only fire once') +- logger.warn('This should never ever fire') +- logger.error('This did not go well') +- logger.fatal('This did not go very well at all') diff --git a/reference/graphql-querying/overview.md b/reference/graphql-querying/overview.md new file mode 100644 index 00000000..917f0e54 --- /dev/null +++ b/reference/graphql-querying/overview.md @@ -0,0 +1,248 @@ +--- +title: GraphQL Querying +--- + + + + + + + +# GraphQL Querying + +Added in: v4.4.0 (provisional) + +Changed in: v4.5.0 (disabled by default, configuration options) + +Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../components/applications.md), and for querying [Resources](../resources/overview.md). + +Get started by setting `graphql: true` in `config.yaml`. This configuration option was added in v4.5.0 to allow more granular control over the GraphQL endpoint. + +This automatically enables a `/graphql` endpoint that can be used for GraphQL queries. + +> Harper's GraphQL component is inspired by the [GraphQL Over HTTP](https://graphql.github.io/graphql-over-http/draft/#) specification; however, it does not fully implement neither that specification nor the [GraphQL](https://spec.graphql.org/) specification. + +Queries can either be `GET` or `POST` requests, and both follow essentially the same request format. `GET` requests must use search parameters, and `POST` requests use the request body. + +For example, to request the GraphQL Query: + +```graphql +query GetDogs { + Dog { + id + name + } +} +``` + +The `GET` request would look like: + +```http +GET /graphql?query=query+GetDogs+%7B+Dog+%7B+id+name+%7D+%7D+%7D +Accept: application/graphql-response+json +``` + +And the `POST` request would look like: + +```http +POST /graphql/ +Content-Type: application/json +Accept: application/graphql-response+json + +{ + "query": "query GetDogs { Dog { id name } } }" +} +``` + +> Tip: For the best user experience, include the `Accept: application/graphql-response+json` header in your request. This provides better status codes for errors. + +The Harper GraphQL querying system is strictly limited to exported Harper Resources. This will typically be a table that uses the `@exported` directive in its schema or `export`'ed custom resources. Queries can only specify Harper Resources and their attributes in the selection set. Queries can filter using [arguments](https://graphql.org/learn/queries/#arguments) on the top-level Resource field. Harper provides a short form pattern for simple queries, and a long form pattern based off of the [Resource Query API](../rest/querying.md) for more complex queries. + +Unlike REST queries, GraphQL queries can specify multiple resources simultaneously: + +```graphql +query GetDogsAndOwners { + Dog { + id + name + breed + } + + Owner { + id + name + occupation + } +} +``` + +This will return all dogs and owners in the database. And is equivalent to executing two REST queries: + +```http +GET /Dog/?select(id,name,breed) +# and +GET /Owner/?select(id,name,occupation) +``` + +## Request Parameters + +There are three request parameters for GraphQL queries: `query`, `operationName`, and `variables` + +1. `query` - _Required_ - The string representation of the GraphQL document. + 1. Limited to [Executable Definitions](https://spec.graphql.org/October2021/#executabledefinition) only. + 1. i.e. GraphQL [`query`](https://graphql.org/learn/queries/#fields) or `mutation` (coming soon) operations, and [fragments](https://graphql.org/learn/queries/#fragments). + 1. If an shorthand, unnamed, or singular named query is provided, they will be executed by default. Otherwise, if there are multiple queries, the `operationName` parameter must be used. +1. `operationName` - _Optional_ - The name of the query operation to execute if multiple queries are provided in the `query` parameter +1. `variables` - _Optional_ - A map of variable values to be used for the specified query + +## Type Checking + +The Harper GraphQL Querying system is designed to handle GraphQL queries and map them directly to Harper's tables, schemas, fields, and relationships to easily query with GraphQL syntax with minimal configuration, code, and overhead. However, the "GraphQL", as a technology has come to encompass an entire model of resolvers and a type checking system, which is outside of the scope of using GraphQL as a _query_ language for data retrieval from Harper. Therefore, the querying system generally does **not** type check, and type checking behavior is outside the scope of resolving queries and is only loosely defined in Harper. + +In variable definitions, the querying system will ensure non-null values exist (and error appropriately), but it will not do any type checking of the value itself. + +For example, the variable `$name: String!` states that `name` should be a non-null, string value. + +- If the request does not contain the `name` variable, an error will be returned +- If the request provides `null` for the `name` variable, an error will be returned +- If the request provides any non-string value for the `name` variable, i.e. `1`, `true`, `{ foo: "bar" }`, the behavior is undefined and an error may or may not be returned. +- If the variable definition is changed to include a default value, `$name: String! = "John"`, then when omitted, `"John"` will be used. + - If `null` is provided as the variable value, an error will still be returned. + - If the default value does not match the type specified (i.e. `$name: String! = 0`), this is also considered undefined behavior. It may or may not fail in a variety of ways. +- Fragments will generally extend non-specified types, and the querying system will do no validity checking on them. For example, `fragment Fields on Any { ... }` is just as valid as `fragment Fields on MadeUpTypeName { ... }`. See the Fragments sections for more details. + +The only notable place the querying system will do some level of type analysis is the transformation of arguments into a query. + +- Objects will be transformed into properly nested attributes +- Strings and Boolean values are passed through as their AST values +- Float and Int values will be parsed using the JavaScript `parseFloat` and `parseInt` methods respectively. +- List and Enums are not supported. + +## Fragments + +The querying system loosely supports fragments. Both fragment definitions and inline fragments are supported, and are entirely a composition utility. Since this system does very little type checking, the `on Type` part of fragments is entirely pointless. Any value can be used for `Type` and it will have the same effect. + +For example, in the query + +```graphql +query Get { + Dog { + ...DogFields + } +} + +fragment DogFields on Dog { + name + breed +} +``` + +The `Dog` type in the fragment has no correlation to the `Dog` resource in the query (that correlates to the Harper `Dog` resource). + +You can literally specify anything in the fragment and it will behave the same way: + +```graphql +fragment DogFields on Any { ... } # this is recommended +fragment DogFields on Cat { ... } +fragment DogFields on Animal { ... } +fragment DogFields on LiterallyAnything { ... } +``` + +As an actual example, fragments should be used for composition: + +```graphql +query Get { + Dog { + ...sharedFields + breed + } + Owner { + ...sharedFields + occupation + } +} + +fragment sharedFields on Any { + id + name +} +``` + +## Short Form Querying + +Any attribute can be used as an argument for a query. In this short form, multiple arguments is treated as multiple equivalency conditions with the default `and` operation. + +For example, the following query requires an `id` variable to be provided, and the system will search for a `Dog` record matching that id. + +```graphql +query GetDog($id: ID!) { + Dog(id: $id) { + name + breed + owner { + name + } + } +} +``` + +And as a properly formed request: + +```http +POST /graphql/ +Content-Type: application/json +Accept: application/graphql-response+json + +{ + "query": "query GetDog($id: ID!) { Dog(id: $id) { name breed owner {name}}", + "variables": { + "id": "0" + } +} +``` + +The REST equivalent would be: + +```http +GET /Dog/?id==0&select(name,breed,owner{name}) +# or +GET /Dog/0?select(name,breed,owner{name}) +``` + +Short form queries can handle nested attributes as well. + +For example, return all dogs who have an owner with the name `"John"` + +```graphql +query GetDog { + Dog(owner: { name: "John" }) { + name + breed + owner { + name + } + } +} +``` + +Would be equivalent to + +```http +GET /Dog/?owner.name==John&select(name,breed,owner{name}) +``` + +And finally, we can put all of these together to create semi-complex, equality based queries! + +The following query has two variables and will return all dogs who have the specified name as well as the specified owner name. + +```graphql +query GetDog($dogName: String!, $ownerName: String!) { + Dog(name: $dogName, owner: { name: $ownerName }) { + name + breed + owner { + name + } + } +} +``` diff --git a/reference/http/api.md b/reference/http/api.md new file mode 100644 index 00000000..bde865df --- /dev/null +++ b/reference/http/api.md @@ -0,0 +1,401 @@ +--- +id: api +title: HTTP API +--- + + + + +The `server` global object is available in all Harper component code. It provides access to the HTTP server middleware chain, WebSocket server, authentication, resource registry, and cluster information. + +## `server.http(listener, options)` + +Add a handler to the HTTP request middleware chain. + +```ts +server.http(listener: RequestListener, options?: HttpOptions): HttpServer[] +``` + +Returns an array of `HttpServer` instances based on the `options.port` and `options.securePort` values. + +**Example:** + +```js +server.http( + (request, next) => { + if (request.url === '/graphql') return handleGraphQLRequest(request); + return next(request); + }, + { runFirst: true } +); +``` + +### `RequestListener` + +```ts +type RequestListener = (request: Request, next: RequestListener) => Promise; +``` + +To continue the middleware chain, call `next(request)`. To short-circuit, return a `Response` (or `Response`-like object) directly. + +### `HttpOptions` + +| Property | Type | Default | Description | +| ------------ | ------- | ----------------- | --------------------------------------------- | +| `runFirst` | boolean | `false` | Insert this handler at the front of the chain | +| `port` | number | `http.port` | Target the HTTP server on this port | +| `securePort` | number | `http.securePort` | Target the HTTPS server on this port | + +### `HttpServer` + +A Node.js [`http.Server`](https://nodejs.org/api/http.html#class-httpserver) or [`https.SecureServer`](https://nodejs.org/api/https.html#class-httpsserver) instance. + +--- + +## `Request` + +A `Request` object is passed to HTTP middleware handlers and direct static REST handlers. It follows the [WHATWG `Request`](https://developer.mozilla.org/en-US/docs/Web/API/Request) API with additional Harper-specific properties. + +### Properties + +| Property | Type | Description | +| ---------- | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `url` | string | The request target (path + query string), e.g. `/path?query=string` | +| `method` | string | HTTP method: `GET`, `POST`, `PUT`, `DELETE`, etc. | +| `headers` | [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) | Request headers | +| `pathname` | string | Path portion of the URL, without query string | +| `protocol` | string | `http` or `https` | +| `data` | any | Deserialized body, based on `Content-Type` header | +| `ip` | string | Remote IP address of the client (or last proxy) | +| `host` | string | Host from the request headers | +| `session` | object | Current cookie-based session (a `Table` record instance). Update with `request.session.update({ key: value })`. A cookie is set automatically the first time a session is updated or a login occurs. | + +### Methods + +#### `request.login(username, password)` + +```ts +login(username: string, password: string): Promise +``` + +Authenticates the user by username and password. On success, creates a session and sets a cookie on the response. Rejects if authentication fails. + +#### `request.sendEarlyHints(link, headers?)` + +```ts +sendEarlyHints(link: string, headers?: object): void +``` + +Sends an [Early Hints](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/103) (HTTP 103) response before the final response. Useful in cache resolution functions to hint at preloadable resources: + +```javascript +class Origin { + async get(request) { + this.getContext().requestContext.sendEarlyHints(''); + return fetch(request); + } +} +Cache.sourcedFrom(Origin); +``` + +### Low-Level Node.js Access + +:::caution +These properties expose the raw Node.js request/response objects and should be used with caution. Using them can break other middleware handlers that depend on the layered `Request`/`Response` pattern. +::: + +| Property | Description | +| --------------- | ----------------------------------------------------------------------------------------------------- | +| `_nodeRequest` | Underlying [`http.IncomingMessage`](https://nodejs.org/api/http.html#http_class_http_incomingmessage) | +| `_nodeResponse` | Underlying [`http.ServerResponse`](https://nodejs.org/api/http.html#http_class_http_serverresponse) | + +--- + +## `Response` + +REST method handlers can return: + +- **Data directly** — Serialized using Harper's content negotiation +- **A `Response` object** — The WHATWG [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) +- **A `Response`-like object** — A plain object with the following properties: + +| Property | Type | Description | +| --------- | --------------------------------------------------------------------- | ------------------------------------------------- | +| `status` | number | HTTP status code (e.g. `200`, `404`) | +| `headers` | [`Headers`](https://developer.mozilla.org/en-US/docs/Web/API/Headers) | Response headers | +| `data` | any | Response data, serialized via content negotiation | +| `body` | Buffer \| string \| ReadableStream \| Blob | Raw response body (alternative to `data`) | + +--- + +## `server.ws(listener, options)` + +Add a handler to the WebSocket connection middleware chain. + +```ts +server.ws(listener: WsListener, options?: WsOptions): HttpServer[] +``` + +**Example:** + +```js +server.ws((ws, request, chainCompletion) => { + chainCompletion.then(() => { + ws.on('message', (data) => console.log('received:', data)); + ws.send('hello'); + }); +}); +``` + +### `WsListener` + +```ts +type WsListener = (ws: WebSocket, request: Request, chainCompletion: Promise, next: WsListener) => Promise; +``` + +| Parameter | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------- | +| `ws` | [`WebSocket`](https://github.com/websockets/ws/blob/main/doc/ws.md#class-websocket) instance | +| `request` | Harper `Request` object from the upgrade event | +| `chainCompletion` | `Promise` that resolves when the HTTP request chain finishes. Await before sending to ensure the HTTP request is handled. | +| `next` | Continue chain: `next(ws, request, chainCompletion)` | + +### `WsOptions` + +| Property | Type | Default | Description | +| ------------ | ------- | ----------------- | ----------------------------------------------- | +| `maxPayload` | number | 100 MB | Maximum WebSocket payload size | +| `runFirst` | boolean | `false` | Insert this handler at the front of the chain | +| `port` | number | `http.port` | Target the WebSocket server on this port | +| `securePort` | number | `http.securePort` | Target the secure WebSocket server on this port | + +--- + +## `server.upgrade(listener, options)` + +Add a handler to the HTTP server `upgrade` event. Use this to delegate upgrade events to an external WebSocket server. + +```ts +server.upgrade(listener: UpgradeListener, options?: UpgradeOptions): void +``` + +**Example** (from the Harper Next.js component): + +```js +server.upgrade( + (request, socket, head, next) => { + if (request.url === '/_next/webpack-hmr') { + return upgradeHandler(request, socket, head).then(() => { + request.__harperdb_request_upgraded = true; + next(request, socket, head); + }); + } + return next(request, socket, head); + }, + { runFirst: true } +); +``` + +When `server.ws()` is registered, Harper adds a default upgrade handler. The default handler sets `request.__harperdb_request_upgraded = true` after upgrading, and checks for this flag before upgrading again (so external upgrade handlers can detect whether Harper has already handled the upgrade). + +### `UpgradeListener` + +```ts +type UpgradeListener = (request: IncomingMessage, socket: Socket, head: Buffer, next: UpgradeListener) => void; +``` + +### `UpgradeOptions` + +| Property | Type | Default | Description | +| ------------ | ------- | ----------------- | ------------------------------------ | +| `runFirst` | boolean | `false` | Insert at the front of the chain | +| `port` | number | `http.port` | Target the HTTP server on this port | +| `securePort` | number | `http.securePort` | Target the HTTPS server on this port | + +--- + +## `server.socket(listener, options)` + +Create a raw TCP or TLS socket server. + +```ts +server.socket(listener: ConnectionListener, options: SocketOptions): SocketServer +``` + +Only one socket server is created per call. A `securePort` takes precedence over `port`. + +### `ConnectionListener` + +Node.js connection listener as in [`net.createServer`](https://nodejs.org/api/net.html#netcreateserveroptions-connectionlistener) or [`tls.createServer`](https://nodejs.org/api/tls.html#tlscreateserveroptions-secureconnectionlistener). + +### `SocketOptions` + +| Property | Type | Description | +| ------------ | ------ | -------------------------------------------------------------------------- | +| `port` | number | Port for a [`net.Server`](https://nodejs.org/api/net.html#class-netserver) | +| `securePort` | number | Port for a [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) | + +### `SocketServer` + +A Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`tls.Server`](https://nodejs.org/api/tls.html#class-tlsserver) instance. + +--- + +## `server.authenticateUser(username, password)` + +Added in: v4.5.0 + +```ts +server.authenticateUser(username: string, password: string): Promise +``` + +Returns the user object for the given username after verifying the password. Throws if the password is incorrect. + +Use this when you need to explicitly verify a user's credentials (e.g., in a custom login endpoint). For lookup without password verification, use [`server.getUser()`](#servergetuserusername). + +--- + +## `server.getUser(username)` + +```ts +server.getUser(username: string): Promise +``` + +Returns the user object for the given username without verifying credentials. Use for authorization checks when the user is already authenticated. + +--- + +## `server.resources` + +The central registry of all resources exported for REST, MQTT, and other protocols. + +### `server.resources.set(name, resource, exportTypes?)` + +Register a resource: + +```js +class NewResource extends Resource {} +server.resources.set('NewResource', NewResource); + +// Limit to specific protocols: +server.resources.set('NewResource', NewResource, { rest: true, mqtt: false }); +``` + +### `server.resources.getMatch(path, exportType?)` + +Find a resource matching a path: + +```js +server.resources.getMatch('/NewResource/some-id'); +server.resources.getMatch('/NewResource/some-id', 'rest'); +``` + +--- + +## `server.operation(operation, context?, authorize?)` + +Execute an [Operations API](../operations-api/overview.md) operation programmatically. + +```ts +server.operation(operation: object, context?: { username: string }, authorize?: boolean): Promise +``` + +| Parameter | Type | Description | +| ----------- | ---------------------- | ---------------------------------------------------- | +| `operation` | object | Operations API request body | +| `context` | `{ username: string }` | Optional: execute as this user | +| `authorize` | boolean | Whether to apply authorization. Defaults to `false`. | + +--- + +## `server.recordAnalytics(value, metric, path?, method?, type?)` + +Record a metric into Harper's analytics system. + +```ts +server.recordAnalytics(value: number, metric: string, path?: string, method?: string, type?: string): void +``` + +| Parameter | Description | +| --------- | ---------------------------------------------------------------------------- | +| `value` | Numeric value (e.g. duration in ms, bytes) | +| `metric` | Metric name | +| `path` | Optional URL path for grouping (omit per-record IDs — use the resource name) | +| `method` | Optional HTTP method for grouping | +| `type` | Optional type for grouping | + +Metrics are aggregated and available via the [analytics API](../analytics/overview.md). + +--- + +## `server.config` + +The parsed `harperdb-config.yaml` configuration object. Read-only access to Harper's current runtime configuration. + +--- + +## `server.nodes` + +Returns an array of node objects registered in the cluster. + +## `server.shards` + +Returns a map of shard number to an array of associated nodes. + +## `server.hostname` + +Returns the hostname of the current node. + +## `server.contentTypes` + +Returns the `Map` of registered content type handlers. Same as the global [`contentTypes`](#contenttypes) object. + +--- + +## `contentTypes` + +A [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) of content type handlers for HTTP request/response serialization. Harper uses content negotiation: the `Content-Type` header selects the deserializer for incoming requests, and the `Accept` header selects the serializer for responses. + +### Built-in Content Types + +| MIME type | Description | +| --------------------- | ------------------ | +| `application/json` | JSON | +| `application/cbor` | CBOR | +| `application/msgpack` | MessagePack | +| `text/csv` | CSV | +| `text/event-stream` | Server-Sent Events | + +### Custom Content Type Handlers + +Register or replace a handler by setting it on the `contentTypes` map: + +```js +import { contentTypes } from 'harperdb'; + +contentTypes.set('text/xml', { + serialize(data) { + return '' + serialize(data) + ''; + }, + q: 0.8, // quality: lower = less preferred during content negotiation +}); +``` + +### Handler Interface + +| Property | Type | Description | +| --------------------------- | ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| `serialize(data)` | `(any) => Buffer \| Uint8Array \| string` | Serialize data for a response | +| `serializeStream(data)` | `(any) => ReadableStream` | Serialize as a stream (for async iterables or large data) | +| `deserialize(buffer)` | `(Buffer \| string) => any` | Deserialize an incoming request body. Used when `deserializeStream` is absent. String for `text/*` types, Buffer for binary types. | +| `deserializeStream(stream)` | `(ReadableStream) => any` | Deserialize an incoming request stream | +| `q` | number (0–1) | Quality indicator for content negotiation. Defaults to `1`. | + +--- + +## Related + +- [HTTP Overview](./overview) +- [HTTP Configuration](./configuration) +- [REST Overview](../rest/overview.md) +- [Security API](../security/api.md) diff --git a/reference/http/configuration.md b/reference/http/configuration.md new file mode 100644 index 00000000..abc21a29 --- /dev/null +++ b/reference/http/configuration.md @@ -0,0 +1,342 @@ +--- +id: configuration +title: HTTP Configuration +--- + + + + + + +The `http` section in `harperdb-config.yaml` controls the built-in HTTP server that serves REST, WebSocket, component, and Operations API traffic. + +Harper must be restarted for configuration changes to take effect. + +## Ports + +### `http.port` + +Type: `integer` + +Default: `9926` + +The port the HTTP server listens on. This is the primary port for REST, WebSocket, MQTT-over-WebSocket, and component traffic. + +### `http.securePort` + +Type: `integer` + +Default: `null` + +The port for HTTPS connections. Requires a valid `tls` section configured with certificate and key. When set, Harper accepts both plaintext (`http.port`) and TLS connections (`http.securePort`) simultaneously. + +## TLS + +TLS is configured in its own top-level `tls` section in `harperdb-config.yaml`, separate from the `http` section. It is shared by the HTTP server (HTTPS), the MQTT broker (secure MQTT), and any TLS socket servers. See [TLS Configuration](./tls) for all options including multi-domain (SNI) certificates and the Operations API override. + +To enable HTTPS, set `http.securePort` and add a `tls` block: + +```yaml +http: + securePort: 9927 + +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +## HTTP/2 + +### `http.http2` + +Added in: v4.5.0 + +Type: `boolean` + +Default: `false` + +Enables HTTP/2 for all API endpoints. HTTP/2 requires TLS, so `http.securePort` must also be set. + +```yaml +http: + http2: true + securePort: 9927 +``` + +## Timeouts and Limits + +### `http.headersTimeout` + +Type: `integer` + +Default: `60000` (ms) + +Maximum time in milliseconds the server waits to receive the complete HTTP headers for a request. + +### `http.keepAliveTimeout` + +Type: `integer` + +Default: `30000` (ms) + +Milliseconds of inactivity after which the server closes an idle keep-alive connection. + +### `http.timeout` + +Type: `integer` + +Default: `120000` (ms) + +Maximum time in milliseconds before a request times out. + +### `http.maxHeaderSize` + +Type: `integer` + +Default: `16394` (bytes) + +Maximum allowed size of HTTP request headers. + +### `http.requestQueueLimit` + +Type: `integer` + +Default: `20000` (ms) + +The maximum estimated request queue time in milliseconds. When the queue exceeds this limit, requests are rejected with HTTP 503. + +## Compression + +### `http.compressionThreshold` + +Added in: v4.2.0 + +Type: `number` + +Default: `1200` (bytes) + +For clients that support Brotli encoding (`Accept-Encoding: br`), responses larger than this threshold are compressed. Streaming query responses are always compressed for supporting clients, regardless of this setting (since their size is unknown upfront). + +```yaml +http: + compressionThreshold: 1200 +``` + +## CORS + +### `http.cors` + +Type: `boolean` + +Default: `true` + +Enables Cross-Origin Resource Sharing, allowing requests from different origins. + +### `http.corsAccessList` + +Type: `string[]` + +Default: `null` + +An array of allowed origin domains when CORS is enabled. When `null`, all origins are allowed. + +```yaml +http: + cors: true + corsAccessList: + - https://example.com + - https://app.example.com +``` + +### `http.corsAccessControlAllowHeaders` + +Added in: v4.5.0 + +Type: `string` + +Default: `"Accept, Content-Type, Authorization"` + +Comma-separated list of headers allowed in the [`Access-Control-Allow-Headers`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Headers) response header for OPTIONS (preflight) requests. + +## Session Affinity + +### `http.sessionAffinity` + +Added in: v4.1.0 + +Type: `string` + +Default: `null` + +Routes repeated requests from the same client to the same worker thread. This can improve caching locality and provide fairness in request handling. + +Accepted values: + +- `ip` — Route by the remote IP address. Use this when Harper is the public-facing server and each client has a distinct IP. +- `` — Route by the value of any HTTP header (e.g., `Authorization`). Use this when Harper is behind a proxy where all requests share the same source IP. + +```yaml +http: + sessionAffinity: ip +``` + +:::caution +If Harper is behind a reverse proxy and you use `ip`, all requests will share the proxy's IP and will be routed to a single thread. Use a header-based value instead. +::: + +## mTLS + +### `http.mtls` + +Added in: v4.3.0 + +Type: `boolean | object` + +Default: `false` + +Enables mutual TLS (mTLS) authentication for HTTP connections. When set to `true`, client certificates are verified against the CA specified in `tls.certificateAuthority`. Authenticated connections use the `CN` (common name) from the certificate subject as the Harper username. + +```yaml +http: + mtls: true +``` + +For granular control, specify an object: + +| Property | Type | Default | Description | +| ------------------------- | ----------------- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `user` | string | (CN from cert) | Authenticate all mTLS connections as this specific user. Set to `null` to skip credential-based authentication (requires combining with `required: true`). | +| `required` | boolean | `false` | Reject any connection that does not provide a valid client certificate. | +| `certificateVerification` | boolean \| object | `false` | Enable CRL/OCSP certificate revocation checking. See below. | + +### `http.mtls.certificateVerification` + +Added in: v4.7.0 (OCSP support) + +Type: `boolean | object` + +Default: `false` + +When mTLS is enabled, Harper can verify the revocation status of client certificates using CRL (Certificate Revocation List) and/or OCSP (Online Certificate Status Protocol). Disabled by default; must be explicitly enabled for environments that require certificate revocation checking. + +Set to `true` to enable with all defaults, or configure as an object: + +**Global:** + +- `failureMode` — `'fail-closed'` (default) | `'fail-open'`. Whether to reject or allow connections when revocation checking fails. + +**CRL** (enabled by default when `certificateVerification` is enabled): + +- `crl.enabled` — boolean, default `true` +- `crl.timeout` — ms to wait for CRL download, default `10000` +- `crl.cacheTtl` — ms to cache CRL, default `86400000` (24h) +- `crl.gracePeriod` — ms grace period after CRL `nextUpdate`, default `86400000` (24h) +- `crl.failureMode` — CRL-specific failure mode + +**OCSP** (enabled by default as CRL fallback): + +- `ocsp.enabled` — boolean, default `true` +- `ocsp.timeout` — ms to wait for OCSP response, default `5000` +- `ocsp.cacheTtl` — ms to cache successful responses, default `3600000` (1h) +- `ocsp.errorCacheTtl` — ms to cache errors, default `300000` (5m) +- `ocsp.failureMode` — OCSP-specific failure mode + +Harper uses a CRL-first strategy with OCSP fallback. If both fail, the configured `failureMode` is applied. + +**Examples:** + +```yaml +# Basic mTLS, no revocation checking +http: + mtls: true + +# mTLS with revocation checking (recommended for production) +http: + mtls: + certificateVerification: true + +# Require mTLS for all connections + revocation checking +http: + mtls: + required: true + certificateVerification: true + +# Custom verification settings +http: + mtls: + certificateVerification: + failureMode: fail-closed + crl: + timeout: 15000 + cacheTtl: 43200000 + ocsp: + timeout: 8000 + cacheTtl: 7200000 +``` + +## Logging + +HTTP request logging is disabled by default. Enabling the `http.logging` block turns on request logging. + +### `http.logging` + +Added in: v4.6.0 + +Type: `object` + +Default: disabled + +```yaml +http: + logging: + level: info # info = all requests, warn = 4xx+, error = 5xx + path: ~/hdb/log/http.log + timing: true # log request timing + headers: false # log request headers (verbose) + id: true # assign and log a unique request ID +``` + +The `level` controls which requests are logged: + +- `info` (or more verbose) — All HTTP requests +- `warn` — Requests with status 400 or above +- `error` — Requests with status 500 or above + +## Complete Example + +```yaml +http: + port: 9926 + securePort: 9927 + http2: true + cors: true + corsAccessList: + - null + compressionThreshold: 1200 + headersTimeout: 60000 + keepAliveTimeout: 30000 + timeout: 120000 + maxHeaderSize: 16384 + requestQueueLimit: 20000 + sessionAffinity: null + mtls: false + logging: + level: warn + path: ~/hdb/log/http.log + timing: true + +# tls is a top-level section — see TLS Configuration +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +## Related + +- [HTTP Overview](./overview) +- [HTTP API](./api) +- [TLS Configuration](./tls) +- [Security Overview](../security/overview.md) +- [Configuration Overview](../configuration/overview.md) diff --git a/reference/http/overview.md b/reference/http/overview.md new file mode 100644 index 00000000..06858a8c --- /dev/null +++ b/reference/http/overview.md @@ -0,0 +1,64 @@ +--- +id: overview +title: HTTP Server +--- + + + + + + +Harper includes a built-in HTTP server that serves as the primary interface for REST, WebSocket, MQTT-over-WebSocket, and component-defined endpoints. The same server handles all application traffic on a configurable port (default `9926`). + +## Architecture + +Harper's HTTP server is multi-threaded. Each thread runs an independent copy of the HTTP stack, and incoming connections are distributed across threads using `SO_REUSEPORT` socket sharing — the most performant mechanism available for multi-threaded socket handling. + +Added in: v4.1.0 (worker threads for HTTP requests) + +Changed in: v4.2.0 (switched from process-per-thread model with session-affinity delegation to `SO_REUSEPORT` socket sharing) + +In previous versions: Session-affinity based socket delegation was used to route requests. This has been deprecated in favor of `SO_REUSEPORT`. + +## Request Handling + +Harper uses a layered middleware chain for HTTP request processing. Components and applications can add handlers to this chain using the [`server.http()`](./api#serverhttplistener-options) API. Handlers are called in order; each handler can either process the request and return a `Response`, or pass it along to the next handler with `next(request)`. + +Request and response objects follow the [WHATWG Fetch API](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API) conventions (`Request` and `Response` classes), providing good composability for layered middleware and clean mapping to REST resource handlers. + +## Protocols Served + +The HTTP server handles multiple protocols on the same port: + +- **REST** — CRUD operations on Harper resources via standard HTTP methods +- **WebSockets** — Real-time bidirectional connections (via `server.ws()`) +- **MQTT over WebSocket** — MQTT clients connecting over WebSocket (sub-protocol `mqtt`) +- **Server-Sent Events** — Streaming updates to browser clients +- **Operations API** — Management API (configurable to share or use separate port) + +## TLS / HTTPS + +HTTPS support is enabled by setting `http.securePort` in `harperdb-config.yaml` and configuring the `tls` section with a certificate and private key. The same `tls` configuration is shared by HTTPS and MQTT secure connections. + +See [Configuration](./configuration) for TLS options and [Security](../security/overview.md) for certificate management details. + +## HTTP/2 + +Added in: v4.5.0 + +HTTP/2 can be enabled with the `http2: true` option in `harperdb-config.yaml`. When enabled, HTTP/2 applies to all API endpoints served on `http.securePort` (HTTP/2 requires TLS). + +## Compression + +Harper automatically compresses HTTP responses using Brotli for clients that advertise `Accept-Encoding: br`. Compression applies when the response body exceeds the configured `compressionThreshold` (default 1200 bytes). Streaming query responses are always compressed for clients that support it (since their size is not known upfront). + +## Logging + +HTTP request logging is not enabled by default. To enable it, add an `http.logging` block to your configuration. See [Configuration](./configuration#logging) for details. + +## Related + +- [HTTP Configuration](./configuration) +- [HTTP API](./api) +- [REST Overview](../rest/overview.md) +- [Security Overview](../security/overview.md) diff --git a/reference/http/tls.md b/reference/http/tls.md new file mode 100644 index 00000000..7f9ff399 --- /dev/null +++ b/reference/http/tls.md @@ -0,0 +1,119 @@ +--- +id: tls +title: TLS Configuration +--- + + + + +Harper uses a top-level `tls` section in `harperdb-config.yaml` to configure Transport Layer Security. This configuration is shared by the HTTP server (HTTPS), the MQTT broker (secure MQTT), and any TLS socket servers created via the [HTTP API](./api#serversocketlistener-options). + +The `operationsApi` section can optionally define its own `tls` block, which overrides the root `tls` for Operations API traffic only. See the [Operations API Configuration](../configuration/operations.md) for more details. + +Harper must be restarted for TLS configuration changes to take effect. + +## TLS Configuration + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +### `tls.certificate` + +Type: `string` + +Default: `"/keys/certificate.pem"` + +Path to the PEM-encoded certificate file. + +### `tls.certificateAuthority` + +Type: `string` + +Default: `"/keys/ca.pem"` + +Path to the PEM-encoded certificate authority (CA) file. Used to verify client certificates when mTLS is enabled. + +### `tls.privateKey` + +Type: `string` + +Default: `"/keys/privateKey.pem"` + +Path to the PEM-encoded private key file. + +### `tls.host` + +Type: `string | undefined` + +The domain name this certificate entry applies to, used for SNI (Server Name Indication) matching. Only relevant when `tls` is defined as an array. When omitted, the certificate's common name (CN) is used as the host name. + +### `tls.ciphers` + +Type: `string | undefined` + +Default: `crypto.defaultCipherList` + +Colon-separated list of allowed TLS cipher suites. When omitted, Node.js [default ciphers](https://nodejs.org/api/crypto.html#nodejs-crypto-constants) are used. See Node.js [Modifying the default TLS cipher suite](https://nodejs.org/api/tls.html#modifying-the-default-tls-cipher-suite) for more information. + +## Enabling HTTPS + +To enable HTTPS, set `http.securePort` in addition to the `tls` section: + +```yaml +http: + securePort: 9927 + +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +When `http.securePort` is set, Harper accepts plaintext connections on `http.port` and TLS connections on `http.securePort` simultaneously. + +## Multi-Domain Certificates (SNI) + +To serve different certificates for different domains using Server Name Indication (SNI), define `tls` as an array of configuration objects. Each entry can optionally include a `host` property specifying which domain it applies to. If `host` is omitted, the certificate's common name and subject alternate names (SANs) are used. + +```yaml +tls: + - certificate: ~/hdb/keys/certificate1.pem + certificateAuthority: ~/hdb/keys/ca1.pem + privateKey: ~/hdb/keys/privateKey1.pem + host: example.com + - certificate: ~/hdb/keys/certificate2.pem + certificateAuthority: ~/hdb/keys/ca2.pem + privateKey: ~/hdb/keys/privateKey2.pem + # host omitted: certificate's CN is used +``` + +## Operations API Override + +The `operationsApi` section can define its own `tls` block to use a separate certificate for the Operations API: + +```yaml +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem + +operationsApi: + network: + securePort: 9924 + tls: + certificate: ~/hdb/keys/ops-certificate.pem + certificateAuthority: ~/hdb/keys/ops-ca.pem + privateKey: ~/hdb/keys/ops-privateKey.pem +``` + +See the [Operations API Configuration](../configuration/operations.md) for more details. + +## Related + +- [HTTP Configuration](./configuration) — `http.securePort`, `http.http2`, `http.mtls` +- [HTTP Overview](./overview) +- [Security mTLS Authentication](../security/mtls-authentication.md) diff --git a/reference/legacy/cloud.md b/reference/legacy/cloud.md new file mode 100644 index 00000000..6535e8c5 --- /dev/null +++ b/reference/legacy/cloud.md @@ -0,0 +1,11 @@ +--- +title: Harper Cloud +--- + +Harper Cloud (also sometimes referred to as Harper Studio) was Harper's original PaaS offering. +It has been fully replaced by [Harper Fabric](https://fabric.harper.fast). +All users are encouraged to migrate or get started using Harper Fabric immediately. + +[Local Studio](../studio/overview.md) is still an available feature, and now uses the same client as Harper Fabric. + +Reach out to [support@harperdb.io](mailto:support@harperdb.io) or join our community [Discord](https://harper.fast/discord) if you have questions. diff --git a/reference/legacy/custom-functions.md b/reference/legacy/custom-functions.md new file mode 100644 index 00000000..167c53f8 --- /dev/null +++ b/reference/legacy/custom-functions.md @@ -0,0 +1,13 @@ +--- +title: Custom Functions +--- + + + +Custom Functions were Harper's original mechanism for adding custom API endpoints and application logic to a Harper instance. They allowed developers to define Fastify-based HTTP routes that ran inside Harper with direct access to the database, and could be deployed across instances via Studio. + +Custom Functions were superseded by the [Components](../components/overview.md) system introduced in v4.2.0. Components provide the same capabilities with a more robust architecture, better tooling, and support for extensions and plugins. + +All users are encouraged to migrate Custom Functions to Components. See the [Components](../components/overview.md) documentation for the modern approach. + +Reach out to [support@harperdb.io](mailto:support@harperdb.io) or join our community [Discord](https://harper.fast/discord) if you have questions. diff --git a/reference/logging/api.md b/reference/logging/api.md new file mode 100644 index 00000000..68083591 --- /dev/null +++ b/reference/logging/api.md @@ -0,0 +1,153 @@ +--- +id: api +title: Logging API +--- + + + + + +## `logger` + +The `logger` global is available in all JavaScript components without any imports. It writes structured log entries to the standard Harper log file (`hdb.log`) at the configured `logging.external` level and path. See [Logging Configuration](./configuration#loggingexternal) for per-component log configuration. + +The `logger` global is a `MainLogger`. Calling `logger.withTag(tag)` returns a `TaggedLogger` scoped to that tag. + +### `MainLogger` + +`MainLogger` always has all log-level methods defined. It also exposes `withTag()` to create a `TaggedLogger`. + +```typescript +interface MainLogger { + trace(...messages: any[]): void; + debug(...messages: any[]): void; + info(...messages: any[]): void; + warn(...messages: any[]): void; + error(...messages: any[]): void; + fatal(...messages: any[]): void; + notify(...messages: any[]): void; + withTag(tag: string): TaggedLogger; +} +``` + +Each method corresponds to a log level. Only entries at or above the configured `logging.level` (or `logging.external.level`) are written. See [Log Levels](./overview#log-levels) for the full hierarchy. + +### `TaggedLogger` + +`TaggedLogger` is returned by `logger.withTag(tag)`. It prefixes every log entry with the given tag, making it easy to filter log output by component or context. + +Because `TaggedLogger` is bound to the configured log level at creation time, methods for levels that are currently disabled are `null`. Always use optional chaining (`?.`) when calling methods on a `TaggedLogger`. + +```typescript +interface TaggedLogger { + trace: ((...messages: any[]) => void) | null; + debug: ((...messages: any[]) => void) | null; + info: ((...messages: any[]) => void) | null; + warn: ((...messages: any[]) => void) | null; + error: ((...messages: any[]) => void) | null; + fatal: ((...messages: any[]) => void) | null; + notify: ((...messages: any[]) => void) | null; +} +``` + +`TaggedLogger` does not have a `withTag()` method. + +### Usage + +#### Basic logging with `logger` + +```javascript +export class MyResource extends Resource { + async get(id) { + logger.debug('Fetching record', { id }); + const record = await super.get(id); + if (!record) { + logger.warn('Record not found', { id }); + } + return record; + } + + async put(record) { + logger.info('Updating record', { id: record.id }); + try { + return await super.put(record); + } catch (err) { + logger.error('Failed to update record', err); + throw err; + } + } +} +``` + +#### Tagged logging with `withTag()` + +Create a tagged logger once per module or class and reuse it. Always use `?.` when calling methods since a given level may be `null` if it is below the configured log level. + +```javascript +const log = logger.withTag('my-resource'); + +export class MyResource extends Resource { + async get(id) { + log.debug?.('Fetching record', { id }); + const record = await super.get(id); + if (!record) { + log.warn?.('Record not found', { id }); + } + return record; + } + + async put(record) { + log.info?.('Updating record', { id: record.id }); + try { + return await super.put(record); + } catch (err) { + log.error?.('Failed to update record', err); + throw err; + } + } +} +``` + +Tagged entries appear in the log with the tag included in the entry header: + +``` +2023-03-09T14:25:05.269Z [info] [my-resource]: Updating record +``` + +### Log Entry Format + +Entries written via `logger` appear in `hdb.log` with the standard format: + +``` + [] [/]: +``` + +Entries written via a `TaggedLogger` include the tag: + +``` + [] []: +``` + +For external components, the thread context is set automatically based on which worker thread executes the code. + + + +## Related + +- [Logging Overview](./overview) +- [Logging Configuration](./configuration) +- [Logging Operations](./operations) diff --git a/reference/logging/configuration.md b/reference/logging/configuration.md new file mode 100644 index 00000000..d32b0f28 --- /dev/null +++ b/reference/logging/configuration.md @@ -0,0 +1,370 @@ +--- +id: configuration +title: Logging Configuration +--- + + + + + + +The `logging` section in `harperdb-config.yaml` controls standard log output. Many logging settings are applied dynamically without a restart (added in v4.6.0). + +## Main Logger + +### `logging.level` + +Type: `string` + +Default: `warn` + +Controls the verbosity of logs. Levels from least to most severe: `trace`, `debug`, `info`, `warn`, `error`, `fatal`, `notify`. Setting a level includes that level and all more-severe levels. + +```yaml +logging: + level: warn +``` + +For example, `level: warn` results in `warn`, `error`, `fatal`, and `notify` logs. + +### `logging.path` + +Type: `string` + +Default: `/log/hdb.log` + +Full file path for the log file. + +```yaml +logging: + path: ~/hdb/log/hdb.log +``` + +### `logging.root` + +Type: `string` + +Default: `/log` + +Directory path where log files are written. Use `path` to specify the full filename; use `root` to specify only the directory (Harper determines the filename). + +```yaml +logging: + root: ~/hdb/log +``` + +### `logging.file` + +Type: `boolean` + +Default: `true` + +Whether to write logs to a file. Disable if you want to use only standard streams. + +```yaml +logging: + file: true +``` + +### `logging.stdStreams` + +Type: `boolean` + +Default: `false` + +Log to `stdout`/`stderr` in addition to (or instead of) the log file. + +When enabled, run Harper in the foreground (`harper`, not `harper start`). + +```yaml +logging: + stdStreams: true +``` + +### `logging.console` + +Type: `boolean` + +Default: `false` + +Controls whether `console.log` and other `console.*` calls (and anything writing to `process.stdout`/`process.stderr` from JS components) are captured to the log file. + +```yaml +logging: + console: true +``` + +### `logging.auditLog` + +Type: `boolean` + +Default: `false` + +Enables audit (table transaction) logging. When enabled, Harper records every insert, update, and delete to a corresponding audit table. Audit log data is accessed via the `read_audit_log` operation. + +See [Database / Transaction Logging](../database/transaction.md) for details on using audit logs. + +```yaml +logging: + auditLog: false +``` + +### `logging.auditRetention` + +Type: `string | number` + +Default: `3d` + +How long audit log entries are retained before automatic eviction. Accepts duration strings (e.g., `3d`, `12h`) or milliseconds. + +```yaml +logging: + auditRetention: 3d +``` + +## Log Rotation + +Rotation provides systematic management of the `hdb.log` file — compressing, archiving, and replacing it on a schedule or size threshold. Rotation is triggered when either `interval` or `maxSize` is set. + +> `interval` and `maxSize` are approximates only. The log file may exceed these values slightly before rotation occurs. + +### `logging.rotation.enabled` + +Type: `boolean` + +Default: `true` + +Enables log rotation. Rotation only activates when `interval` or `maxSize` is also set. + +### `logging.rotation.compress` + +Type: `boolean` + +Default: `false` + +Compress rotated log files with gzip. + +### `logging.rotation.interval` + +Type: `string` + +Default: `null` + +Time between rotations. Accepted units: `D` (days), `H` (hours), `M` (minutes). Example: `1D`, `12H`. + +### `logging.rotation.maxSize` + +Type: `string` + +Default: `null` + +Maximum log file size before rotation. Accepted units: `K` (kilobytes), `M` (megabytes), `G` (gigabytes). Example: `100M`, `1G`. + +### `logging.rotation.path` + +Type: `string` + +Default: `/log` + +Directory for storing rotated log files. Rotated files are named: `HDB-YYYY-MM-DDT-HH-MM-SSSZ.log`. + +```yaml +logging: + rotation: + enabled: true + compress: false + interval: 1D + maxSize: 100M + path: ~/hdb/log +``` + +## Authentication Logging + +### `logging.auditAuthEvents.logFailed` + +Added in: v4.2.0 + +Type: `boolean` + +Default: `false` + +Log all failed authentication attempts. + +Example log entry: + +``` +[error] [auth-event]: {"username":"admin","status":"failure","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"} +``` + +### `logging.auditAuthEvents.logSuccessful` + +Added in: v4.2.0 + +Type: `boolean` + +Default: `false` + +Log all successful authentication events. + +Example log entry: + +``` +[notify] [auth-event]: {"username":"admin","status":"success","type":"authentication","originating_ip":"127.0.0.1","request_method":"POST","path":"/","auth_strategy":"Basic"} +``` + +```yaml +logging: + auditAuthEvents: + logFailed: false + logSuccessful: false +``` + +## Per-Component Logging + +Added in: v4.6.0 + +Harper supports independent logging configurations for different components. Each component logger can have its own `path`, `root`, `level`, `tag`, and `stdStreams` settings. All components default to the main `logging` configuration unless overridden. + +### `logging.external` + +Logging configuration for all external components that use the [`logger` API](./api). + +```yaml +logging: + external: + level: warn + path: ~/hdb/log/apps.log +``` + +### `http.logging` + +HTTP request logging. Disabled by default — defining this section enables it. + +```yaml +http: + logging: + level: info # info = all requests, warn = 4xx+, error = 5xx + path: ~/hdb/log/http.log + timing: true # log request duration + headers: false # log request headers (verbose) + id: true # assign and log a unique request ID per request +``` + +See [HTTP Configuration](../http/configuration.md) for full details. + +### `mqtt.logging` + +MQTT logging configuration. Accepts standard logging options. + +```yaml +mqtt: + logging: + level: warn + path: ~/hdb/log/mqtt.log + stdStreams: false +``` + +### `authentication.logging` + +Authentication subsystem logging. Accepts standard logging options. + +```yaml +authentication: + logging: + level: warn + path: ~/hdb/log/auth.log +``` + +### `replication.logging` + +Replication subsystem logging. Accepts standard logging options. + +```yaml +replication: + logging: + level: warn + path: ~/hdb/log/replication.log +``` + +### `tls.logging` + +TLS subsystem logging. Accepts standard logging options. + +```yaml +tls: + logging: + level: warn + path: ~/hdb/log/tls.log +``` + +### `storage.logging` + +Database storage subsystem logging. Accepts standard logging options. + +```yaml +storage: + logging: + level: warn + path: ~/hdb/log/storage.log +``` + +### `analytics.logging` + +Analytics subsystem logging. Accepts standard logging options. + +```yaml +analytics: + logging: + level: warn + path: ~/hdb/log/analytics.log +``` + +## Clustering Log Level + +Clustering has a separate log level due to its verbosity. Configure with `clustering.logLevel`. + +Valid levels from least verbose: `error`, `warn`, `info`, `debug`, `trace`. + +```yaml +clustering: + logLevel: warn +``` + +## Complete Example + +```yaml +logging: + level: warn + path: ~/hdb/log/hdb.log + file: true + stdStreams: false + console: false + auditLog: false + auditRetention: 3d + rotation: + enabled: true + compress: false + interval: 1D + maxSize: 100M + path: ~/hdb/log + auditAuthEvents: + logFailed: false + logSuccessful: false + external: + level: warn + path: ~/hdb/log/apps.log + +http: + logging: + level: warn + path: ~/hdb/log/http.log + timing: true +``` + +## Related + +- [Logging Overview](./overview) +- [Logging API](./api) +- [Logging Operations](./operations) +- [Database / Transaction Logging](../database/transaction.md) +- [Configuration Overview](../configuration/overview.md) diff --git a/reference/logging/operations.md b/reference/logging/operations.md new file mode 100644 index 00000000..de149d62 --- /dev/null +++ b/reference/logging/operations.md @@ -0,0 +1,91 @@ +--- +id: operations +title: Logging Operations +--- + + + + +Operations for reading the standard Harper log (`hdb.log`). All operations are restricted to `super_user` roles only. + +> Audit log and transaction log operations (`read_audit_log`, `read_transaction_log`, `delete_audit_logs_before`, `delete_transaction_logs_before`) are documented in [Database / Transaction Logging](../database/transaction.md). + +--- + +## `read_log` + +Returns log entries from the primary Harper log (`hdb.log`) matching the provided criteria. + +_Restricted to super_user roles only._ + +### Parameters + +| Parameter | Required | Type | Description | +| ----------- | -------- | ------ | ------------------------------------------------------------------------------------------------------ | +| `operation` | Yes | string | Must be `"read_log"` | +| `start` | No | number | Result offset to start from. Default: `0` (first entry in `hdb.log`). | +| `limit` | No | number | Maximum number of entries to return. Default: `1000`. | +| `level` | No | string | Filter by log level. One of: `notify`, `error`, `warn`, `info`, `debug`, `trace`. Default: all levels. | +| `from` | No | string | Start of time window. Format: `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default: first entry in log. | +| `until` | No | string | End of time window. Format: `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`. Default: last entry in log. | +| `order` | No | string | Sort order: `asc` or `desc` by timestamp. Default: maintains `hdb.log` order. | +| `filter` | No | string | A substring that must appear in each returned log line. | + +### Request + +```json +{ + "operation": "read_log", + "start": 0, + "limit": 1000, + "level": "error", + "from": "2021-01-25T22:05:27.464+0000", + "until": "2021-01-25T23:05:27.464+0000", + "order": "desc" +} +``` + +### Response + +```json +[ + { + "level": "notify", + "message": "Connected to cluster server.", + "timestamp": "2021-01-25T23:03:20.710Z", + "thread": "main/0", + "tags": [] + }, + { + "level": "warn", + "message": "Login failed", + "timestamp": "2021-01-25T22:24:45.113Z", + "thread": "http/9", + "tags": [] + }, + { + "level": "error", + "message": "unknown attribute 'name and breed'", + "timestamp": "2021-01-25T22:23:24.167Z", + "thread": "http/9", + "tags": [] + } +] +``` + +### Response Fields + +| Field | Type | Description | +| ----------- | ------ | ------------------------------------------------------------------------------------------------------- | +| `level` | string | Log level of the entry. | +| `message` | string | Log message. | +| `timestamp` | string | ISO 8601 timestamp when the event occurred. | +| `thread` | string | Thread name and ID (e.g., `main/0`, `http/3`). | +| `tags` | array | Additional context tags. Entries from components may include `custom-function` or other component tags. | + +## Related + +- [Logging Overview](./overview) +- [Logging Configuration](./configuration) +- [Database / Transaction Logging](../database/transaction.md) +- [Operations API Overview](../operations-api/overview.md) diff --git a/reference/logging/overview.md b/reference/logging/overview.md new file mode 100644 index 00000000..862aa01c --- /dev/null +++ b/reference/logging/overview.md @@ -0,0 +1,92 @@ +--- +id: overview +title: Logging +--- + + + + + + +Harper's core logging system is used for diagnostics, monitoring, and observability. It has an extensive configuration system, and even supports feature-specific (per-component) configurations in latest versions. Furthermore, the `logger` global API is available for creating custom logs from any JavaScript application or plugin code. + +> If you are looking for information on Harper's Audit and Transaction logging system, refer to the [Database](../database/transaction.md) section. + +## Log File + +Changed in: v4.1.0 — All logs consolidated into a single `hdb.log` file + +All standard log output is written to `/log/hdb.log` (default: `~/hdb/log/hdb.log`). + +## Log Entry Format + +Each log entry follows this structure: + +``` + [/] [] ...[]: +``` + +Example: + +``` +2023-03-09T14:25:05.269Z [main/0] [notify]: HarperDB successfully started. +``` + +Fields: + +| Field | Description | +| ----------- | -------------------------------------------------------------------------------------------- | +| `timestamp` | ISO 8601 date/time when the event occurred. | +| `level` | Severity level. See [Log Levels](#log-levels) below. | +| `thread/id` | Name and ID of the thread that produced the log entry (generally, `main`, `http`, or `job`). | +| `tags` | Additional context tags (e.g., `custom-function`, `auth-event`). Most entries have no tags. | +| `message` | The log message. | + +### Log Levels + +From least to most severe (most verbose to least verbose): + +| Level | Description | +| -------- | --------------------------------------------------------------------------------------------- | +| `trace` | Highly detailed internal execution tracing. | +| `debug` | Diagnostic information useful during development. | +| `info` | General operational events. | +| `warn` | Potential issues that don't prevent normal operation. | +| `error` | Errors that affect specific operations. | +| `fatal` | Critical errors causing process termination. | +| `notify` | Important operational milestones (e.g., "server started"). Always logged regardless of level. | + +The default log level is `warn`. Setting a level includes that level and all more-severe levels. For example, `warn` logs `warn`, `error`, `fatal`, and `notify`. + +## Standard Streams + +Changed in: v4.6.0 + +By default, logs are written only to the log file. To also log to `stdout`/`stderr`, set [`logging.stdStreams: true`](./configuration.md#loggingstdstreams) (this is automatically enabled by the `DEFAULT_MODE=dev` configuration during installation). + +When logging to standard streams, run Harper in the foreground (i.e. `harper`, not `harper start`). + +As of v4.6.0, logging to standard streams does **not** include timestamps, and console logging (`console.log`, etc.) does not get forwarded to log files unless the [`logging.console: true`](./configuration.md#loggingconsole) option is enabled. + +## Logger API + +JavaScript components can use the `logger` global to write structured log entries: + +```javascript +logger.trace('detailed trace message'); +logger.debug('debug info', { someContext: 'value' }); +logger.info('informational message'); +logger.warn('potential issue'); +logger.error('error occurred', error); +logger.fatal('fatal error'); +logger.notify('server is ready'); +``` + +The `logger` global provides `trace`, `debug`, `info`, `warn`, `error`, `fatal`, and `notify` methods. The logger is based on the Node.js Console API. See [Logging API](./api) for full details. + +## Related + +- [Logging Configuration](./configuration) +- [Logging API](./api) +- [Logging Operations](./operations) +- [Database / Transaction Logging](../database/transaction.md) diff --git a/reference/mqtt/configuration.md b/reference/mqtt/configuration.md new file mode 100644 index 00000000..93205ee9 --- /dev/null +++ b/reference/mqtt/configuration.md @@ -0,0 +1,231 @@ +--- +id: configuration +title: MQTT Configuration +--- + + + + + + + +The `mqtt` section in `harperdb-config.yaml` controls Harper's built-in MQTT broker. MQTT is enabled by default. + +Harper must be restarted for configuration changes to take effect. + +## Minimal Example + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + webSocket: true + requireAuthentication: true +``` + +## Ports + +### `mqtt.network.port` + +Type: `integer` + +Default: `1883` + +The port for plaintext (non-TLS) MQTT connections. + +### `mqtt.network.securePort` + +Type: `integer` + +Default: `8883` + +The port for secure MQTT connections (MQTTS). Uses the `tls` configuration for certificates. See [TLS Configuration](../http/tls.md) for certificate setup. + +## WebSocket + +### `mqtt.webSocket` + +Type: `boolean` + +Default: `true` + +Enables MQTT over WebSockets. When enabled, Harper handles WebSocket connections on the HTTP port (default `9926`) that specify the `mqtt` sub-protocol (`Sec-WebSocket-Protocol: mqtt`). This is required by the MQTT specification and should be set by any conformant MQTT-over-WebSocket client. + +```yaml +mqtt: + webSocket: true +``` + +## Authentication + +### `mqtt.requireAuthentication` + +Type: `boolean` + +Default: `true` + +Controls whether credentials are required to establish an MQTT connection. When `true`, clients must authenticate with either a username/password or a valid mTLS client certificate. + +When set to `false`, unauthenticated connections are allowed. Unauthenticated clients are still subject to authorization on each publish and subscribe operation — by default, tables and resources do not grant access to unauthenticated users, but this can be configured at the resource level. + +```yaml +mqtt: + requireAuthentication: true +``` + +## mTLS + +### `mqtt.network.mtls` + +Added in: v4.3.0 + +Type: `boolean | object` + +Default: `false` + +Enables mutual TLS (mTLS) authentication for MQTT connections. When set to `true`, client certificates are verified against the CA specified in the root `tls.certificateAuthority` section. Authenticated connections use the `CN` (common name) from the client certificate's subject as the Harper username by default. + +```yaml +mqtt: + network: + mtls: true +``` + +For granular control, specify an object with the following optional properties: + +### `mqtt.network.mtls.user` + +Type: `string | null` + +Default: Common Name from client certificate + +Specifies a fixed username to authenticate all mTLS connections as. When set, any connection that passes certificate verification authenticates as this user regardless of the certificate's CN. + +Setting to `null` disables credential-based authentication for mTLS connections. When combined with `required: true`, this enforces that clients must have a valid certificate AND provide separate credential-based authentication. + +### `mqtt.network.mtls.required` + +Type: `boolean` + +Default: `false` + +When `true`, all incoming MQTT connections must provide a valid client certificate. Connections without a valid certificate are rejected. By default, clients can authenticate with either mTLS or standard username/password credentials. + +### `mqtt.network.mtls.certificateAuthority` + +Type: `string` + +Default: Path from `tls.certificateAuthority` + +Path to the certificate authority (CA) file used to verify MQTT client certificates. By default, uses the CA configured in the root `tls` section. Set this if MQTT clients should be verified against a different CA than the one used for HTTP/TLS. + +### `mqtt.network.mtls.certificateVerification` + +Type: `boolean | object` + +Default: `true` + +When mTLS is enabled, Harper verifies the revocation status of client certificates using OCSP (Online Certificate Status Protocol). This ensures revoked certificates cannot be used for authentication. + +Set to `false` to disable revocation checking, or configure as an object: + +| Property | Type | Default | Description | +| ------------- | ------- | ------------- | ------------------------------------------------------------------------------------------------------ | +| `timeout` | integer | `5000` | Maximum milliseconds to wait for an OCSP response. | +| `cacheTtl` | integer | `3600000` | Milliseconds to cache successful verification results (default 1h). | +| `failureMode` | string | `'fail-open'` | Behavior when OCSP verification fails: `'fail-open'` (allow, log warning) or `'fail-closed'` (reject). | + +```yaml +mqtt: + network: + mtls: + required: true + certificateVerification: + failureMode: fail-closed + timeout: 5000 + cacheTtl: 3600000 +``` + +## mTLS Examples + +```yaml +# Require client certificate + standard credentials (combined auth) +mqtt: + network: + mtls: + user: null + required: true + +# Authenticate all mTLS connections as a fixed user +mqtt: + network: + mtls: + user: mqtt-service-account + required: true + +# mTLS optional — clients can use mTLS or credentials +mqtt: + network: + mtls: true +``` + +## Logging + +### `mqtt.logging` + +Type: `object` + +Default: disabled + +Configures logging for MQTT activity. Accepts the standard logging configuration options. + +```yaml +mqtt: + logging: + path: ~/hdb/log/mqtt.log + level: warn + stdStreams: false +``` + +| Option | Description | +| ------------ | ------------------------------------------------- | +| `path` | File path for the MQTT log output. | +| `root` | Alternative to `path` — sets the log directory. | +| `level` | Log level: `error`, `warn`, `info`, `debug`, etc. | +| `tag` | Custom tag to prefix log entries. | +| `stdStreams` | When `true`, also logs to stdout/stderr. | + +## Complete Example + +```yaml +mqtt: + network: + port: 1883 + securePort: 8883 + mtls: + required: false + certificateAuthority: ~/hdb/keys/ca.pem + certificateVerification: + failureMode: fail-open + timeout: 5000 + cacheTtl: 3600000 + webSocket: true + requireAuthentication: true + logging: + level: warn + path: ~/hdb/log/mqtt.log + +# TLS is a top-level section, shared with HTTP +tls: + certificate: ~/hdb/keys/certificate.pem + certificateAuthority: ~/hdb/keys/ca.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +## Related + +- [MQTT Overview](./overview) +- [TLS Configuration](../http/tls.md) +- [Security Overview](../security/overview.md) +- [Configuration Overview](../configuration/overview.md) diff --git a/reference/mqtt/overview.md b/reference/mqtt/overview.md new file mode 100644 index 00000000..3da53187 --- /dev/null +++ b/reference/mqtt/overview.md @@ -0,0 +1,142 @@ +--- +id: overview +title: MQTT +--- + + + + + + +Added in: v4.2.0 + +Harper includes a built-in MQTT broker that provides real-time pub/sub messaging deeply integrated with the database. Unlike a generic MQTT broker, Harper's MQTT implementation connects topics directly to database records — publishing to a topic writes to the database, and subscribing to a topic delivers live updates for the corresponding record. + +## How Topics Map to Database Records + +MQTT topics in Harper follow the same path convention as REST endpoints. If you define a table or resource with an endpoint path of `my-resource`, the corresponding MQTT topic namespace is `my-resource`. + +A topic of `my-resource/some-id` corresponds to the record with id `some-id` in the `my-resource` table (or custom resource). This means: + +- **Subscribing** to `my-resource/some-id` delivers notification messages whenever that record is updated or deleted. +- The **current value** of the record is treated as the retained message for that topic. On subscription, the subscriber immediately receives the current record as the initial retained message — no separate GET request needed. +- **Publishing** with the `retain` flag set replaces the record in the database (equivalent to a PUT operation). +- **Publishing without** the `retain` flag delivers the message to current subscribers without writing to the database. + +Defining a table that creates a topic can be as simple as adding a table with no attributes to your [schema.graphql](../database/schema.md) in a Harper application: + +```graphql +type MyTopic @table @export +``` + +## Protocol Support + +Harper supports MQTT versions **v3.1.1** and **v5**, with standard publish/subscribe capabilities. + +### Topics and Wildcards + +Harper supports multi-level topics for both publishing and subscribing: + +- **Multi-level wildcard (`#`)** — Subscribe to `my-resource/#` to receive notifications for all records in that resource, including nested paths (`my-resource/some-id`, `my-resource/nested/id`). +- **Single-level wildcard (`+`)** — Added in v4.3.0. Subscribe to `my-resource/+/status` to match any single path segment. + +### QoS Levels + +- **QoS 0** — At most once delivery (fire and forget). +- **QoS 1** — At least once delivery (acknowledged delivery). +- **QoS 2** — Harper can perform the QoS 2 conversation but does not guarantee exactly-once delivery. + +### Sessions + +- **Clean sessions** — Subscriptions and queued messages are discarded on disconnect. +- **Durable sessions** — Subscriptions and queued messages are persisted across reconnects. + +### Last Will + +Added in: v4.3.0 + +Harper supports the MQTT Last Will and Testament feature. If a client disconnects unexpectedly, the broker publishes the configured will message on its behalf. + +## Content Negotiation + +Harper handles structured data natively. Messages can be published and received in any supported structured format — JSON, CBOR, or MessagePack — and Harper stores and delivers them as structured objects. Different clients can independently choose their preferred format: one client may publish in JSON while another subscribes and receives in CBOR. + +## Ordering and Distributed Delivery + +Harper is designed for distributed, low-latency message delivery. Messages are delivered to subscribers immediately on arrival — Harper does not delay delivery to coordinate consensus across nodes. + +In a distributed cluster, messages may arrive out of order due to network topology. The behavior depends on whether the message is retained or non-retained: + +- **Retained messages** (published with `retain: true`, or written via PUT/upsert) maintain eventual consistency across the cluster. Harper keeps the message with the latest timestamp as the winning record state. An out-of-order earlier message will not be re-delivered to clients; the cluster converges to the most recent state. +- **Non-retained messages** are always delivered to local subscribers when received, even if they arrive out of order. Every message is delivered, prioritizing completeness over strict ordering. + +**Non-retained messages** are suited for applications like chat where every message must be delivered. **Retained messages** are suited for sensor readings or state updates where only the latest value matters. + +## Authentication + +MQTT connections support two authentication methods: + +- **Credential-based** — Standard MQTT username/password in the CONNECT packet. +- **mTLS** — Added in v4.3.0. Mutual TLS authentication using client certificates. The `CN` (common name) from the client certificate subject is used as the Harper username by default. + +Authentication is required by default (`requireAuthentication: true`). See [MQTT Configuration](./configuration) for details on disabling authentication or configuring mTLS options. + +## Server Events API + +JavaScript components can listen for MQTT connection events via `server.mqtt.events`: + +```javascript +server.mqtt.events.on('connected', (session, socket) => { + console.log('client connected with id', session.clientId); +}); +``` + +Available events: + +| Event | Description | +| -------------- | ---------------------------------------------------- | +| `connection` | Client establishes a TCP or WebSocket connection | +| `connected` | Client completes MQTT handshake and is authenticated | +| `auth-failed` | Client fails to authenticate | +| `disconnected` | Client disconnects | + +## Feature Support Matrix + +| Feature | Support | +| --------------------------------------------- | ------------------------------------------------------------ | +| MQTT v3.1.1 connections | ✅ | +| MQTT v5 connections | ✅ | +| Secure MQTTS (TLS) | ✅ | +| MQTT over WebSockets | ✅ | +| Authentication via username/password | ✅ | +| Authentication via mTLS | ✅ (added v4.3.0) | +| Publish | ✅ | +| Subscribe | ✅ | +| Multi-level wildcard (`#`) | ✅ | +| Single-level wildcard (`+`) | ✅ (added v4.3.0) | +| QoS 0 | ✅ | +| QoS 1 | ✅ | +| QoS 2 | Not fully supported — conversation supported, not guaranteed | +| Keep-Alive monitoring | ✅ | +| Clean session | ✅ | +| Durable session | ✅ | +| Distributed durable session | Not supported | +| Last Will | ✅ | +| MQTT V5 Subscribe retain handling | ✅ (added v4.3.0) | +| MQTT V5 User properties | Not supported | +| MQTT V5 Will properties | Not supported | +| MQTT V5 Connection properties | Not supported | +| MQTT V5 Connection acknowledgement properties | Not supported | +| MQTT V5 Publish properties | Not supported | +| MQTT V5 Subscribe properties (general) | Not supported | +| MQTT V5 Ack properties | Not supported | +| MQTT V5 AUTH command | Not supported | +| MQTT V5 Shared subscriptions | Not supported | + +## Related + +- [MQTT Configuration](./configuration) +- [HTTP Overview](../http/overview.md) +- [Security Overview](../security/overview.md) +- [Database Schema](../database/schema.md) +- [REST Overview](../rest/overview.md) diff --git a/reference/operations-api/operations.md b/reference/operations-api/operations.md new file mode 100644 index 00000000..690aeecd --- /dev/null +++ b/reference/operations-api/operations.md @@ -0,0 +1,990 @@ +--- +title: Operations Reference +--- + + + + + + + + + + + + + + + + + + +# Operations Reference + +This page lists all available Operations API operations, grouped by category. Each entry links to the feature section where the full documentation lives. + +For endpoint and authentication setup, see the [Operations API Overview](./overview.md). + +--- + +## Databases & Tables + +Operations for managing databases, tables, and attributes. + +Detailed documentation: [Database Overview](../database/overview.md) + +| Operation | Description | Role Required | +| ------------------- | ------------------------------------------------------------------- | ------------- | +| `describe_all` | Returns definitions of all databases and tables, with record counts | any | +| `describe_database` | Returns all table definitions for a specified database | any | +| `describe_table` | Returns the definition of a specified table | any | +| `create_database` | Creates a new database | super_user | +| `drop_database` | Drops a database and all its tables/records | super_user | +| `create_table` | Creates a new table with optional schema and expiration | super_user | +| `drop_table` | Drops a table and all its records | super_user | +| `create_attribute` | Adds a new attribute to a table | super_user | +| `drop_attribute` | Removes an attribute and all its values from a table | super_user | +| `get_backup` | Returns a binary snapshot of a database for backup purposes | super_user | + +### `describe_all` + +Returns the definitions of all databases and tables within the database. Record counts above 5000 records are estimated; the response includes `estimated_record_range` when estimated. To force an exact count (requires full table scan), include `"exact_count": true`. + +```json +{ "operation": "describe_all" } +``` + +### `describe_database` + +Returns all table definitions within the specified database. + +```json +{ "operation": "describe_database", "database": "dev" } +``` + +### `describe_table` + +Returns the definition of a specific table. + +```json +{ "operation": "describe_table", "table": "dog", "database": "dev" } +``` + +### `create_database` + +Creates a new database. + +```json +{ "operation": "create_database", "database": "dev" } +``` + +### `drop_database` + +Drops a database and all its tables/records. Supports `"replicated": true` to propagate to all cluster nodes. + +```json +{ "operation": "drop_database", "database": "dev" } +``` + +### `create_table` + +Creates a new table. Optional fields: `database` (defaults to `data`), `attributes` (array defining schema), `expiration` (TTL in seconds). + +```json +{ + "operation": "create_table", + "database": "dev", + "table": "dog", + "primary_key": "id" +} +``` + +### `drop_table` + +Drops a table and all associated records. Supports `"replicated": true`. + +```json +{ "operation": "drop_table", "database": "dev", "table": "dog" } +``` + +### `create_attribute` + +Creates a new attribute within a table. Harper auto-creates attributes on insert/update, but this can be used to pre-define them (e.g., for role-based permission setup). + +```json +{ + "operation": "create_attribute", + "database": "dev", + "table": "dog", + "attribute": "is_adorable" +} +``` + +### `drop_attribute` + +Drops an attribute and all its values from the specified table. + +```json +{ + "operation": "drop_attribute", + "database": "dev", + "table": "dog", + "attribute": "is_adorable" +} +``` + +### `get_backup` + +Returns a binary snapshot of the specified database (or individual table). Safe for backup while Harper is running. Specify `"table"` for a single table or `"tables"` for a set. + +```json +{ "operation": "get_backup", "database": "dev" } +``` + +--- + +## NoSQL Operations + +Operations for inserting, updating, deleting, and querying records using NoSQL. + +Detailed documentation: [REST Querying Reference](../rest/querying.md) + +| Operation | Description | Role Required | +| ---------------------- | ------------------------------------------------------------------------- | ------------- | +| `insert` | Inserts one or more records | any | +| `update` | Updates one or more records by primary key | any | +| `upsert` | Inserts or updates records | any | +| `delete` | Deletes records by primary key | any | +| `search_by_id` | Retrieves records by primary key | any | +| `search_by_value` | Retrieves records matching a value on any attribute | any | +| `search_by_conditions` | Retrieves records matching complex conditions with sorting and pagination | any | + +### `insert` + +Inserts one or more records. If a primary key is not provided, a GUID or auto-increment value is generated. + +```json +{ + "operation": "insert", + "database": "dev", + "table": "dog", + "records": [{ "id": 1, "dog_name": "Penny" }] +} +``` + +### `update` + +Updates one or more records. Primary key must be supplied for each record. + +```json +{ + "operation": "update", + "database": "dev", + "table": "dog", + "records": [{ "id": 1, "weight_lbs": 38 }] +} +``` + +### `upsert` + +Updates existing records and inserts new ones. Matches on primary key if provided. + +```json +{ + "operation": "upsert", + "database": "dev", + "table": "dog", + "records": [{ "id": 1, "weight_lbs": 40 }] +} +``` + +### `delete` + +Deletes records by primary key values. + +```json +{ + "operation": "delete", + "database": "dev", + "table": "dog", + "ids": [1, 2] +} +``` + +### `search_by_id` + +Returns records matching the given primary key values. Use `"get_attributes": ["*"]` to return all attributes. + +```json +{ + "operation": "search_by_id", + "database": "dev", + "table": "dog", + "ids": [1, 2], + "get_attributes": ["dog_name", "breed_id"] +} +``` + +### `search_by_value` + +Returns records with a matching value on any attribute. Supports wildcards (e.g., `"Ky*"`). + +```json +{ + "operation": "search_by_value", + "database": "dev", + "table": "dog", + "attribute": "owner_name", + "value": "Ky*", + "get_attributes": ["id", "dog_name"] +} +``` + +### `search_by_conditions` + +Returns records matching one or more conditions. Supports `operator` (`and`/`or`), `offset`, `limit`, nested `conditions` groups, and `sort` with multi-level tie-breaking. + +```json +{ + "operation": "search_by_conditions", + "database": "dev", + "table": "dog", + "operator": "and", + "limit": 10, + "get_attributes": ["*"], + "conditions": [{ "attribute": "age", "comparator": "between", "value": [5, 8] }] +} +``` + +--- + +## Bulk Operations + +Operations for bulk import/export of data. + +Detailed documentation: [Database Jobs](../database/jobs.md) + +| Operation | Description | Role Required | +| ----------------------- | -------------------------------------------------------------- | ------------- | +| `export_local` | Exports query results to a local file in JSON or CSV | super_user | +| `csv_data_load` | Ingests CSV data provided inline | any | +| `csv_file_load` | Ingests CSV data from a server-local file path | any | +| `csv_url_load` | Ingests CSV data from a URL | any | +| `export_to_s3` | Exports query results to AWS S3 | super_user | +| `import_from_s3` | Imports CSV or JSON data from AWS S3 | any | +| `delete_records_before` | Deletes records older than a given timestamp (local node only) | super_user | + +All bulk import/export operations are asynchronous and return a job ID. Use [`get_job`](#get_job) to check status. + +### `export_local` + +Exports query results to a local path on the server. Formats: `json` or `csv`. + +```json +{ + "operation": "export_local", + "format": "json", + "path": "/data/", + "search_operation": { "operation": "sql", "sql": "SELECT * FROM dev.dog" } +} +``` + +### `csv_data_load` + +Ingests inline CSV data. Actions: `insert` (default), `update`, `upsert`. + +```json +{ + "operation": "csv_data_load", + "database": "dev", + "table": "dog", + "action": "insert", + "data": "id,name\n1,Penny\n" +} +``` + +### `csv_file_load` + +Ingests CSV from a file path on the server running Harper. + +```json +{ + "operation": "csv_file_load", + "database": "dev", + "table": "dog", + "file_path": "/home/user/imports/dogs.csv" +} +``` + +### `csv_url_load` + +Ingests CSV from a URL. + +```json +{ + "operation": "csv_url_load", + "database": "dev", + "table": "dog", + "csv_url": "https://example.com/dogs.csv" +} +``` + +### `export_to_s3` + +Exports query results to an AWS S3 bucket as JSON or CSV. + +```json +{ + "operation": "export_to_s3", + "format": "json", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET", + "bucket": "my-bucket", + "key": "dogs.json", + "region": "us-east-1" + }, + "search_operation": { "operation": "sql", "sql": "SELECT * FROM dev.dog" } +} +``` + +### `import_from_s3` + +Imports CSV or JSON from an AWS S3 bucket. File must include a valid `.csv` or `.json` extension. + +```json +{ + "operation": "import_from_s3", + "database": "dev", + "table": "dog", + "s3": { + "aws_access_key_id": "YOUR_KEY", + "aws_secret_access_key": "YOUR_SECRET", + "bucket": "my-bucket", + "key": "dogs.csv", + "region": "us-east-1" + } +} +``` + +### `delete_records_before` + +Deletes records older than the specified timestamp from the local node only. Clustered nodes retain their data. + +```json +{ + "operation": "delete_records_before", + "date": "2021-01-25T23:05:27.464", + "schema": "dev", + "table": "dog" +} +``` + +--- + +## SQL Operations + +Operations for executing SQL statements. + +:::warning +Harper SQL is intended for data investigation and use cases where performance is not a priority. For production workloads, use NoSQL or REST operations. SQL performance optimizations are on the roadmap. +::: + +Detailed documentation: [SQL Reference](../database/sql.md) + +| Operation | Description | Role Required | +| --------- | ------------------------------------------------------------------ | ------------- | +| `sql` | Executes a SQL `SELECT`, `INSERT`, `UPDATE`, or `DELETE` statement | any | + +### `sql` + +Executes a standard SQL statement. + +```json +{ "operation": "sql", "sql": "SELECT * FROM dev.dog WHERE id = 1" } +``` + +--- + +## Users & Roles + +Operations for managing users and role-based access control (RBAC). + +Detailed documentation: [Users & Roles Operations](../users-and-roles/operations.md) + +| Operation | Description | Role Required | +| ------------ | --------------------------------------------------- | ------------- | +| `list_roles` | Returns all roles | super_user | +| `add_role` | Creates a new role with permissions | super_user | +| `alter_role` | Modifies an existing role's permissions | super_user | +| `drop_role` | Deletes a role (role must have no associated users) | super_user | +| `list_users` | Returns all users | super_user | +| `user_info` | Returns data for the authenticated user | any | +| `add_user` | Creates a new user | super_user | +| `alter_user` | Modifies an existing user's credentials or role | super_user | +| `drop_user` | Deletes a user | super_user | + +### `list_roles` + +Returns all roles defined in the instance. + +```json +{ "operation": "list_roles" } +``` + +### `add_role` + +Creates a new role with the specified permissions. The `permission` object maps database names to table-level access rules (`read`, `insert`, `update`, `delete`). Set `super_user: true` to grant full access. + +```json +{ + "operation": "add_role", + "role": "developer", + "permission": { + "super_user": false, + "dev": { + "tables": { + "dog": { "read": true, "insert": true, "update": true, "delete": false } + } + } + } +} +``` + +### `alter_role` + +Modifies an existing role's name or permissions. Requires the role's `id` (returned by `list_roles`). + +```json +{ + "operation": "alter_role", + "id": "f92162e2-cd17-450c-aae0-372a76859038", + "role": "senior_developer", + "permission": { + "super_user": false, + "dev": { + "tables": { + "dog": { "read": true, "insert": true, "update": true, "delete": true } + } + } + } +} +``` + +### `drop_role` + +Deletes a role. The role must have no associated users before it can be dropped. + +```json +{ "operation": "drop_role", "id": "f92162e2-cd17-450c-aae0-372a76859038" } +``` + +### `list_users` + +Returns all users. + +```json +{ "operation": "list_users" } +``` + +### `user_info` + +Returns data for the currently authenticated user. + +```json +{ "operation": "user_info" } +``` + +### `add_user` + +Creates a new user. `username` cannot be changed after creation. `password` is stored encrypted. + +```json +{ + "operation": "add_user", + "role": "developer", + "username": "hdb_user", + "password": "password", + "active": true +} +``` + +### `alter_user` + +Modifies an existing user's password, role, or active status. All fields except `username` are optional. + +```json +{ + "operation": "alter_user", + "username": "hdb_user", + "password": "new_password", + "role": "senior_developer", + "active": true +} +``` + +### `drop_user` + +Deletes a user by username. + +```json +{ "operation": "drop_user", "username": "hdb_user" } +``` + +See [Users & Roles Operations](../users-and-roles/operations.md) for full documentation including permission object structure. + +--- + +## Token Authentication + +Operations for JWT token creation and refresh. + +Detailed documentation: [JWT Authentication](../security/jwt-authentication.md) + +| Operation | Description | Role Required | +| ------------------------------ | ------------------------------------------------------- | ---------------------- | +| `create_authentication_tokens` | Creates an operation token and refresh token for a user | none (unauthenticated) | +| `refresh_operation_token` | Creates a new operation token from a refresh token | any | + +### `create_authentication_tokens` + +Does not require prior authentication. Returns `operation_token` (short-lived JWT) and `refresh_token` (long-lived JWT). + +```json +{ + "operation": "create_authentication_tokens", + "username": "my-user", + "password": "my-password" +} +``` + +### `refresh_operation_token` + +Creates a new operation token from an existing refresh token. + +```json +{ + "operation": "refresh_operation_token", + "refresh_token": "EXISTING_REFRESH_TOKEN" +} +``` + +--- + +## Components + +Operations for deploying and managing Harper components (applications, extensions, plugins). + +Detailed documentation: [Components Overview](../components/overview.md) + +| Operation | Description | Role Required | +| ---------------------- | ----------------------------------------------------------------------- | ------------- | +| `add_component` | Creates a new component project from a template | super_user | +| `deploy_component` | Deploys a component via payload (tar) or package reference (NPM/GitHub) | super_user | +| `package_component` | Packages a component project into a base64-encoded tar | super_user | +| `drop_component` | Deletes a component or a file within a component | super_user | +| `get_components` | Lists all component files and config | super_user | +| `get_component_file` | Returns the contents of a file within a component | super_user | +| `set_component_file` | Creates or updates a file within a component | super_user | +| `add_ssh_key` | Adds an SSH key for deploying from private repositories | super_user | +| `update_ssh_key` | Updates an existing SSH key | super_user | +| `delete_ssh_key` | Deletes an SSH key | super_user | +| `list_ssh_keys` | Lists all configured SSH key names | super_user | +| `set_ssh_known_hosts` | Overwrites the SSH known_hosts file | super_user | +| `get_ssh_known_hosts` | Returns the contents of the SSH known_hosts file | super_user | +| `install_node_modules` | _(Deprecated)_ Run npm install on component projects | super_user | + +### `deploy_component` + +Deploys a component. The `package` option accepts any valid NPM reference including GitHub repos (`HarperDB/app#semver:v1.0.0`), tarballs, or NPM packages. The `payload` option accepts a base64-encoded tar string from `package_component`. Supports `"replicated": true` and `"restart": true` or `"restart": "rolling"`. + +```json +{ + "operation": "deploy_component", + "project": "my-app", + "package": "my-org/my-app#semver:v1.2.3", + "replicated": true, + "restart": "rolling" +} +``` + +### `add_ssh_key` + +Adds an SSH key (must be ed25519) for authenticating deployments from private repositories. + +```json +{ + "operation": "add_ssh_key", + "name": "my-key", + "key": "-----BEGIN OPENSSH PRIVATE KEY-----\n...\n-----END OPENSSH PRIVATE KEY-----\n", + "host": "my-key.github.com", + "hostname": "github.com" +} +``` + +--- + +## Replication & Clustering + +Operations for configuring and managing Harper cluster replication. + +Detailed documentation: [Replication & Clustering](../replication/clustering.md) + +| Operation | Description | Role Required | +| ----------------------- | --------------------------------------------------------------- | ------------- | +| `add_node` | Adds a Harper instance to the cluster | super_user | +| `update_node` | Modifies an existing node's subscriptions | super_user | +| `remove_node` | Removes a node from the cluster | super_user | +| `cluster_status` | Returns current cluster connection status | super_user | +| `configure_cluster` | Bulk-creates/resets cluster subscriptions across multiple nodes | super_user | +| `cluster_set_routes` | Adds routes to the replication routes config (PATCH/upsert) | super_user | +| `cluster_get_routes` | Returns the current replication routes config | super_user | +| `cluster_delete_routes` | Removes routes from the replication routes config | super_user | + +### `add_node` + +Adds a remote Harper node to the cluster. If `subscriptions` are not provided, a fully replicating cluster is created. Optional fields: `verify_tls`, `authorization`, `retain_authorization`, `revoked_certificates`, `shard`. + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { "username": "admin", "password": "password" } +} +``` + +### `cluster_status` + +Returns connection state for all cluster nodes, including per-database socket status and replication timing statistics (`lastCommitConfirmed`, `lastReceivedRemoteTime`, `lastReceivedLocalTime`). + +```json +{ "operation": "cluster_status" } +``` + +### `configure_cluster` + +Resets and replaces the entire clustering configuration. Each entry follows the `add_node` schema. + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "hostname": "server-two", + "subscriptions": [{ "database": "dev", "table": "dog", "subscribe": true, "publish": true }] + } + ] +} +``` + +--- + +## Configuration + +Operations for reading and updating Harper configuration. + +Detailed documentation: [Configuration Overview](../configuration/overview.md) + +| Operation | Description | Role Required | +| ------------------- | ---------------------------------------------------------------- | ------------- | +| `set_configuration` | Modifies Harper configuration file parameters (requires restart) | super_user | +| `get_configuration` | Returns the current Harper configuration | super_user | + +### `set_configuration` + +Updates configuration parameters in `harperdb-config.yaml`. A restart (`restart` or `restart_service`) is required for changes to take effect. + +```json +{ + "operation": "set_configuration", + "logging_level": "trace", + "clustering_enabled": true +} +``` + +### `get_configuration` + +Returns the full current configuration object. + +```json +{ "operation": "get_configuration" } +``` + +--- + +## System + +Operations for restarting Harper and managing system state. + +| Operation | Description | Role Required | +| -------------------- | ----------------------------------------------------- | ------------- | +| `restart` | Restarts the Harper instance | super_user | +| `restart_service` | Restarts a specific Harper service | super_user | +| `system_information` | Returns detailed host system metrics | super_user | +| `set_status` | Sets an application-specific status value (in-memory) | super_user | +| `get_status` | Returns a previously set status value | super_user | +| `clear_status` | Removes a status entry | super_user | + +### `restart` + +Restarts all Harper processes. May take up to 60 seconds. + +```json +{ "operation": "restart" } +``` + +### `restart_service` + +Restarts a specific service. `service` must be one of: `http_workers`, `clustering_config`, `clustering`. Supports `"replicated": true` for a rolling cluster restart. + +```json +{ "operation": "restart_service", "service": "http_workers" } +``` + +### `system_information` + +Returns system metrics including CPU, memory, disk, network, and Harper process info. Optionally filter by `attributes` array (e.g., `["cpu", "memory", "replication"]`). + +```json +{ "operation": "system_information" } +``` + +### `set_status` / `get_status` / `clear_status` + +Manage in-memory application status values. Status types: `primary`, `maintenance`, `availability` (availability only accepts `'Available'` or `'Unavailable'`). Status is not persisted across restarts. + +```json +{ "operation": "set_status", "id": "primary", "status": "active" } +``` + +--- + +## Jobs + +Operations for querying background job status. + +Detailed documentation: [Database Jobs](../database/jobs.md) + +| Operation | Description | Role Required | +| --------------------------- | ------------------------------------------------ | ------------- | +| `get_job` | Returns status and results for a specific job ID | any | +| `search_jobs_by_start_date` | Returns jobs within a specified time window | super_user | + +### `get_job` + +Returns job status (`COMPLETE`, `IN_PROGRESS`, `ERROR`), timing, and result message for the specified job ID. Bulk import/export operations return a job ID on initiation. + +```json +{ "operation": "get_job", "id": "4a982782-929a-4507-8794-26dae1132def" } +``` + +### `search_jobs_by_start_date` + +Returns all jobs started within the specified datetime range. + +```json +{ + "operation": "search_jobs_by_start_date", + "from_date": "2021-01-25T22:05:27.464+0000", + "to_date": "2021-01-25T23:05:27.464+0000" +} +``` + +--- + +## Logs + +Operations for reading Harper logs. + +Detailed documentation: [Logging Operations](../logging/operations.md) + +| Operation | Description | Role Required | +| -------------------------------- | ---------------------------------------------------------------------- | ------------- | +| `read_log` | Returns entries from the primary `hdb.log` | super_user | +| `read_transaction_log` | Returns transaction history for a table | super_user | +| `delete_transaction_logs_before` | Deletes transaction log entries older than a timestamp | super_user | +| `read_audit_log` | Returns verbose audit history for a table (requires audit log enabled) | super_user | +| `delete_audit_logs_before` | Deletes audit log entries older than a timestamp | super_user | + +### `read_log` + +Returns entries from `hdb.log`. Filter by `level` (`notify`, `error`, `warn`, `info`, `debug`, `trace`), date range (`from`, `until`), and text `filter`. + +```json +{ + "operation": "read_log", + "start": 0, + "limit": 100, + "level": "error" +} +``` + +### `read_transaction_log` + +Returns transaction history for a specific table. Optionally filter by `from`/`to` (millisecond epoch) and `limit`. + +```json +{ + "operation": "read_transaction_log", + "schema": "dev", + "table": "dog", + "limit": 10 +} +``` + +### `read_audit_log` + +Returns verbose audit history including original record state. Requires `logging.auditLog: true` in configuration. Filter by `search_type`: `hash_value`, `timestamp`, or `username`. + +```json +{ + "operation": "read_audit_log", + "schema": "dev", + "table": "dog", + "search_type": "username", + "search_values": ["admin"] +} +``` + +--- + +## Certificate Management + +Operations for managing TLS certificates in the `hdb_certificate` system table. + +Detailed documentation: [Certificate Management](../security/certificate-management.md) + +| Operation | Description | Role Required | +| -------------------- | ---------------------------------------------- | ------------- | +| `add_certificate` | Adds or updates a certificate | super_user | +| `remove_certificate` | Removes a certificate and its private key file | super_user | +| `list_certificates` | Lists all certificates | super_user | + +### `add_certificate` + +Adds a certificate to `hdb_certificate`. If a `private_key` is provided, it is written to `/keys/` (not stored in the table). If no private key is provided, the operation searches for a matching one on disk. + +```json +{ + "operation": "add_certificate", + "name": "my-cert", + "certificate": "-----BEGIN CERTIFICATE-----...", + "is_authority": false, + "private_key": "-----BEGIN RSA PRIVATE KEY-----..." +} +``` + +--- + +## Analytics + +Operations for querying analytics metrics. + +Detailed documentation: [Analytics Operations](../analytics/operations.md) + +| Operation | Description | Role Required | +| ----------------- | ----------------------------------------------- | ------------- | +| `get_analytics` | Retrieves analytics data for a specified metric | any | +| `list_metrics` | Lists available analytics metrics | any | +| `describe_metric` | Returns the schema of a specific metric | any | + +### `get_analytics` + +Retrieves analytics data. Supports `start_time`/`end_time` (Unix ms), `get_attributes`, and `conditions` (same format as `search_by_conditions`). + +```json +{ + "operation": "get_analytics", + "metric": "resource-usage", + "start_time": 1769198332754, + "end_time": 1769198532754 +} +``` + +### `list_metrics` + +Returns available metric names. Filter by `metric_types`: `custom`, `builtin` (default: `builtin`). + +```json +{ "operation": "list_metrics" } +``` + +--- + +## Registration & Licensing + +Operations for license management. + +| Operation | Description | Role Required | +| ----------------------- | -------------------------------------------------- | ------------- | +| `registration_info` | Returns registration and version information | any | +| `install_usage_license` | Installs a Harper usage license block | super_user | +| `get_usage_licenses` | Returns all usage licenses with consumption counts | super_user | +| `get_fingerprint` | _(Deprecated)_ Returns the machine fingerprint | super_user | +| `set_license` | _(Deprecated)_ Sets a license key | super_user | + +### `registration_info` + +Returns the instance registration status, version, RAM allocation, and license expiration. + +```json +{ "operation": "registration_info" } +``` + +### `install_usage_license` + +Installs a usage license block. A license is a JWT-like structure (`header.payload.signature`) signed by Harper. Multiple blocks may be installed; earliest blocks are consumed first. + +```json +{ + "operation": "install_usage_license", + "license": "abc...0123.abc...0123.abc...0123" +} +``` + +### `get_usage_licenses` + +Returns all usage licenses (including expired/exhausted) with current consumption counts. Optionally filter by `region`. + +```json +{ "operation": "get_usage_licenses" } +``` + +--- + +## Deprecated Operations + +The following operations are deprecated and should not be used in new code. + +### Custom Functions (Deprecated) + +Custom Functions were the precursor to the Component architecture introduced in v4.2.0. These operations are preserved for backward compatibility. + +Deprecated in: v4.2.0 (moved to legacy in v4.7+) + +For modern equivalents, see [Components Overview](../components/overview.md). + +| Operation | Description | +| --------------------------------- | ------------------------------------------------ | +| `custom_functions_status` | Returns Custom Functions server status | +| `get_custom_functions` | Lists all Custom Function projects | +| `get_custom_function` | Returns a Custom Function file's content | +| `set_custom_function` | Creates or updates a Custom Function file | +| `drop_custom_function` | Deletes a Custom Function file | +| `add_custom_function_project` | Creates a new Custom Function project | +| `drop_custom_function_project` | Deletes a Custom Function project | +| `package_custom_function_project` | Packages a Custom Function project as base64 tar | +| `deploy_custom_function_project` | Deploys a packaged Custom Function project | + +### Other Deprecated Operations + +| Operation | Replaced By | +| ---------------------- | ------------------------------------------------------------------- | +| `install_node_modules` | Handled automatically by `deploy_component` and `restart` | +| `get_fingerprint` | Use `registration_info` | +| `set_license` | Use `install_usage_license` | +| `search_by_hash` | Use `search_by_id` | +| `search_attribute` | Use `attribute` field in `search_by_value` / `search_by_conditions` | +| `search_value` | Use `value` field in `search_by_value` / `search_by_conditions` | +| `search_type` | Use `comparator` field in `search_by_conditions` | diff --git a/reference/operations-api/overview.md b/reference/operations-api/overview.md new file mode 100644 index 00000000..711d6ebb --- /dev/null +++ b/reference/operations-api/overview.md @@ -0,0 +1,87 @@ +--- +title: Operations API Overview +--- + + + +# Operations API + +The Operations API provides a comprehensive set of capabilities for configuring, deploying, administering, and controlling Harper. It is the primary programmatic interface for all administrative and operational tasks that are not handled through the REST interface. + +## Endpoint + +All Operations API requests are sent as HTTP POST requests to the Operations API endpoint. By default, this listens on port `9925` on the root path: + +``` +POST http://:9925/ +``` + +See [Configuration Overview](../configuration/overview.md) for how to change the port and other network settings (`operationsApi.network.port`, `operationsApi.network.securePort`). + +## Request Format + +Each request body must be a JSON object with an `operation` field that identifies the operation to perform: + +```http +POST https://my-harper-server:9925/ +Authorization: Basic YourBase64EncodedUser:Pass +Content-Type: application/json + +{ + "operation": "create_table", + "table": "my-table" +} +``` + +## Authentication + +Operations API requests must be authenticated. Harper supports two authentication methods: + +- **Basic Auth**: Base64-encoded `username:password` in the `Authorization` header. See [Basic Authentication](../security/basic-authentication.md). +- **JWT**: A Bearer token in the `Authorization` header, obtained via `create_authentication_tokens`. See [JWT Authentication](../security/jwt-authentication.md). + +The `create_authentication_tokens` operation itself does not require prior authentication — it accepts a username and password and returns an operation token and refresh token. + +## Example with curl + +```bash +curl --location --request POST 'https://my-harper-server:9925/' \ + --header 'Authorization: Basic YourBase64EncodedUser:Pass' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "operation": "create_table", + "table": "my-table" + }' +``` + +## Authorization + +Most operations are restricted to `super_user` roles. This is noted in the documentation for each operation. Some operations (such as `user_info`, `get_job`, and `create_authentication_tokens`) are available to all authenticated users. + +## Operations Reference + +Operations are grouped by topic. See [Operations](./operations.md) for the complete reference list. + +**Topic categories:** + +| Category | Description | Detailed Docs | +| ------------------------------------------------------------------- | -------------------------------------------------------------- | --------------------------------------------------------------- | +| [Databases & Tables](./operations.md#databases--tables) | Create and manage databases, tables, and attributes | [Database Overview](../database/overview.md) | +| [NoSQL Operations](./operations.md#nosql-operations) | Insert, update, upsert, delete, and query records | [REST Querying Reference](../rest/querying.md) | +| [Bulk Operations](./operations.md#bulk-operations) | CSV/S3 import and export, batch delete | [Database Jobs](../database/jobs.md) | +| [SQL Operations](./operations.md#sql-operations) | Execute SQL statements (use for investigation, not production) | — | +| [Users & Roles](./operations.md#users--roles) | Manage users and role-based access control | [Users & Roles Operations](../users-and-roles/operations.md) | +| [Token Authentication](./operations.md#token-authentication) | Create and refresh JWT tokens | [JWT Authentication](../security/jwt-authentication.md) | +| [Components](./operations.md#components) | Deploy and manage Harper components | [Components Overview](../components/overview.md) | +| [Replication & Clustering](./operations.md#replication--clustering) | Configure cluster topology and replication | [Replication & Clustering](../replication/clustering.md) | +| [Configuration](./operations.md#configuration) | Read and update Harper configuration | — | +| [System](./operations.md#system) | Restart, system information, status management | — | +| [Jobs](./operations.md#jobs) | Query background job status | [Database Jobs](../database/jobs.md) | +| [Logs](./operations.md#logs) | Read standard, transaction, and audit logs | [Logging Operations](../logging/operations.md) | +| [Certificate Management](./operations.md#certificate-management) | Manage TLS certificates | [Certificate Management](../security/certificate-management.md) | +| [Analytics](./operations.md#analytics) | Query analytics metrics | [Analytics Operations](../analytics/operations.md) | +| [Registration & Licensing](./operations.md#registration--licensing) | License management | — | + +## Past Release API Documentation + +For API documentation prior to v4.0, see [olddocs.harperdb.io](https://olddocs.harperdb.io). diff --git a/reference/replication/clustering.md b/reference/replication/clustering.md new file mode 100644 index 00000000..98937eef --- /dev/null +++ b/reference/replication/clustering.md @@ -0,0 +1,318 @@ +--- +title: Clustering +--- + + + + +# Clustering + +Operations API for managing Harper's replication system. For an overview of how replication works, see [Replication Overview](./overview.md). For sharding configuration, see [Sharding](./sharding.md). + +All clustering operations require `super_user` role. + +--- + +### Add Node + +Adds a new Harper instance to the cluster. If `subscriptions` are provided, it creates the specified replication relationships between the nodes. Without `subscriptions`, a fully replicating system is created (all data in all databases). + +**Parameters**: + +- `operation` _(required)_ — must be `add_node` +- `hostname` or `url` _(required)_ — the hostname or URL of the node to add +- `verify_tls` _(optional)_ — whether to verify the TLS certificate. Set to `false` temporarily on fresh installs with self-signed certificates. Defaults to `true` +- `authorization` _(optional)_ — credentials for the node being added. Either an object with `username` and `password`, or an HTTP `Authorization` style string +- `retain_authorization` _(optional)_ — if `true`, stores credentials and uses them on every reconnect. Generally not recommended; prefer certificate-based authentication. Defaults to `false` +- `revoked_certificates` _(optional)_ — array of revoked certificate serial numbers that will not be accepted for any connections +- `shard` _(optional)_ — shard number for this node. Only needed when using sharding +- `start_time` _(optional)_ — ISO 8601 UTC datetime. If set, only data after this time is downloaded during initial synchronization instead of the entire database +- `subscriptions` _(optional)_ — explicit table-level replication relationships. This is optional (and discouraged). Each subscription is an object with: + - `database` — database name + - `table` — table name + - `subscribe` — if `true`, transactions on the remote table are replicated locally + - `publish` — if `true`, transactions on the local table are replicated to the remote node + +**Request**: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password" + } +} +``` + +**Response**: + +```json +{ + "message": "Successfully added 'server-two' to cluster" +} +``` + +> **Note**: `set_node` is an alias for `add_node`. + +--- + +### Update Node + +Modifies an existing Harper instance in the cluster. Will attempt to add the node if it does not exist. + +**Parameters**: + +- `operation` _(required)_ — must be `update_node` +- `hostname` _(required)_ — hostname of the remote node to update +- `revoked_certificates` _(optional)_ — array of revoked certificate serial numbers +- `shard` _(optional)_ — shard number to assign to this node +- `subscriptions` _(required)_ — array of subscription objects (same structure as `add_node`) + +**Request**: + +```json +{ + "operation": "update_node", + "hostname": "server-two" +} +``` + +**Response**: + +```json +{ + "message": "Successfully updated 'server-two'" +} +``` + +--- + +### Remove Node + +Removes a Harper node from the cluster and stops all replication to and from that node. + +**Parameters**: + +- `operation` _(required)_ — must be `remove_node` +- `hostname` _(required)_ — hostname of the node to remove + +**Request**: + +```json +{ + "operation": "remove_node", + "hostname": "server-two" +} +``` + +**Response**: + +```json +{ + "message": "Successfully removed 'server-two' from cluster" +} +``` + +--- + +### Cluster Status + +Returns an array of status objects from the cluster, including active WebSocket connections and replication timing statistics. + +Added in: v4.4.0; timing statistics added in v4.5.0 + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_status` + +**Request**: + +```json +{ + "operation": "cluster_status" +} +``` + +**Response**: + +```json +{ + "type": "cluster-status", + "connections": [ + { + "replicateByDefault": true, + "replicates": true, + "url": "wss://server-2.domain.com:9933", + "name": "server-2.domain.com", + "subscriptions": null, + "database_sockets": [ + { + "database": "data", + "connected": true, + "latency": 0.7, + "thread_id": 1, + "nodes": ["server-2.domain.com"], + "lastCommitConfirmed": "Wed, 12 Feb 2025 19:09:34 GMT", + "lastReceivedRemoteTime": "Wed, 12 Feb 2025 16:49:29 GMT", + "lastReceivedLocalTime": "Wed, 12 Feb 2025 16:50:59 GMT", + "lastSendTime": "Wed, 12 Feb 2025 16:50:59 GMT" + } + ] + } + ], + "node_name": "server-1.domain.com", + "is_enabled": true +} +``` + +`database_sockets` shows the actual WebSocket connections between nodes — one socket per database per node. Timing fields: + +| Field | Description | +| ------------------------ | ----------------------------------------------------------------------------------------------------------------------------------- | +| `lastCommitConfirmed` | Last time a receipt of confirmation was received for an outgoing commit | +| `lastReceivedRemoteTime` | Timestamp (from the originating node) of the last received transaction | +| `lastReceivedLocalTime` | Local time when the last transaction was received. A gap between this and `lastReceivedRemoteTime` suggests the node is catching up | +| `sendingMessage` | Timestamp of the transaction actively being sent. Absent when waiting for the next transaction | + +--- + +### Configure Cluster + +Bulk creates or resets subscriptions for any number of remote nodes. **Resets and replaces any existing clustering setup.** + +**Parameters**: + +- `operation` _(required)_ — must be `configure_cluster` +- `connections` _(required)_ — array of node objects following the `add_node` schema + +**Request**: + +```json +{ + "operation": "configure_cluster", + "connections": [ + { + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password2" + } + }, + { + "hostname": "server-three", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password3" + } + } + ] +} +``` + +**Response**: + +```json +{ + "message": "Cluster successfully configured." +} +``` + +--- + +### Cluster Set Routes + +Adds routes to the `replication.routes` configuration. Behaves as a PATCH/upsert — adds new routes while leaving existing routes untouched. + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_set_routes` +- `routes` _(required)_ — array of route strings (`wss://host:port`) or objects with `hostname` and `port` properties + +**Request**: + +```json +{ + "operation": "cluster_set_routes", + "routes": [ + "wss://server-two:9925", + { + "hostname": "server-three", + "port": 9930 + } + ] +} +``` + +**Response**: + +```json +{ + "message": "cluster routes successfully set", + "set": ["wss://server-two:9925", { "hostname": "server-three", "port": 9930 }], + "skipped": [] +} +``` + +--- + +### Cluster Get Routes + +Returns the replication routes from the Harper config file. + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_get_routes` + +**Request**: + +```json +{ + "operation": "cluster_get_routes" +} +``` + +**Response**: + +```json +["wss://server-two:9925", { "hostname": "server-three", "port": 9930 }] +``` + +--- + +### Cluster Delete Routes + +Removes routes from the Harper config file. + +**Parameters**: + +- `operation` _(required)_ — must be `cluster_delete_routes` +- `routes` _(required)_ — array of route objects to remove + +**Request**: + +```json +{ + "operation": "cluster_delete_routes", + "routes": [ + { + "hostname": "server-three", + "port": 9930 + } + ] +} +``` + +**Response**: + +```json +{ + "message": "cluster routes successfully deleted", + "deleted": [{ "hostname": "server-three", "port": 9930 }], + "skipped": [] +} +``` diff --git a/reference/replication/overview.md b/reference/replication/overview.md new file mode 100644 index 00000000..7847ecc9 --- /dev/null +++ b/reference/replication/overview.md @@ -0,0 +1,309 @@ +--- +title: Replication Overview +--- + + + + +# Replication Overview + +Harper's replication system is designed to make distributed data replication fast and reliable across multiple nodes. You can build a distributed database that ensures high availability, disaster recovery, and data localization — all without complex setup. Nodes can be added or removed dynamically, you can choose which data to replicate, and you can monitor cluster health without jumping through hoops. + +## Peer-to-Peer Model + +Harper replication uses a peer-to-peer model where every node in your cluster can send data to and receive data from other nodes. Nodes communicate over WebSockets, allowing data to flow in both directions. Harper automatically manages these connections and subscriptions, so you don't need to manually track data consistency. Connections between nodes are secured and reliable by default. + +## Configuration + +### Connecting Nodes + +To connect nodes to each other, provide hostnames or URLs in the `replication` section of `harperdb-config.yaml`. Each node specifies its own hostname and the routes (other nodes) it should connect to: + +```yaml +replication: + hostname: server-one + routes: + - server-two + - server-three +``` + +Routes can also be specified as URLs or with explicit port numbers: + +```yaml +replication: + hostname: server-one + routes: + - wss://server-two:9933 + - hostname: server-three + port: 9933 +``` + +By default, replication connects on the secure port `9933`. + +```yaml +replication: + securePort: 9933 +``` + +You can also manage nodes dynamically through the [Operations API](./clustering.md) without editing the config file. + +### Gossip Discovery + +Harper automatically replicates node information to other nodes in the cluster using [gossip-style discovery](https://highscalability.com/gossip-protocol-explained/). This means you only need to connect to one existing node in a cluster, and Harper will automatically detect and connect to all other nodes bidirectionally. + +### Data Selection + +By default, Harper replicates all data in all databases. You can narrow replication to specific databases: + +```yaml +replication: + databases: + - data + - system +``` + +All tables within a replicated database are replicated by default. To exclude a specific table from replication, set `replicate: false` in the table definition: + +```graphql +type LocalTableForNode @table(replicate: false) { + id: ID! + name: String! +} +``` + +Transactions are replicated atomically, which may span multiple tables. You can also control how many nodes data is replicated to using [sharding configuration](./sharding.md). + +## Securing Connections + +Harper supports PKI-based security and authorization for replication connections. Two authentication methods are supported: + +- **Certificate-based authentication** (recommended for production): Nodes are identified by the certificate's common name (CN) or Subject Alternative Names (SANs). +- **IP-based authentication** (for development/testing): Nodes are identified by IP address when using insecure connections. + +Harper can automatically perform CRL (Certificate Revocation List) and OCSP (Online Certificate Status Protocol) verification to ensure revoked certificates cannot be used. OCSP and CRL work automatically with certificates from public CAs when `enableRootCAs` is enabled. For self-signed certificates or private CAs without OCSP/CRL support, use Harper's manual certificate revocation feature. Certificate verification settings follow the same configuration as HTTP mTLS connections (see [Certificate Verification](../security/certificate-verification.md)). + +### Providing Your Own Certificates + +If you have certificates from a public or corporate CA, enable `enableRootCAs` so nodes validate against the standard root CA list: + +```yaml +replication: + enableRootCAs: true +``` + +Ensure the certificate's CN matches the node's hostname. + +### Setting Up Custom Certificates + +There are two ways to configure Harper with your own certificates: + +1. Use the `add_certificate` operation to upload them. +2. Specify certificate paths directly in `harperdb-config.yaml`: + +```yaml +tls: + certificate: /path/to/certificate.pem + certificateAuthority: /path/to/ca.pem + privateKey: /path/to/privateKey.pem +``` + +Harper will load the provided certificates into the certificate table and use them to secure and authenticate connections. If you have a publicly-signed certificate, you can omit the `certificateAuthority` and enable `enableRootCAs` to use the bundled Mozilla CA store instead. + +### Cross-Generated Certificates + +Harper can generate its own certificates for secure connections — useful when no existing certificates are available. When you run `add_node` over SSL with temporary credentials, Harper automatically handles certificate generation and signing: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "verify_tls": false, + "authorization": { + "username": "admin", + "password": "password" + } +} +``` + +On a fresh install, set `verify_tls: false` temporarily to accept the self-signed certificate. Harper then: + +1. Creates a certificate signing request (CSR) and sends it to `server-two`. +2. `server-two` signs the CSR and returns the signed certificate and CA. +3. The signed certificate is stored for all future connections. + +Credentials are not stored — they are discarded immediately after use. You can also provide credentials in HTTP Authorization format (Basic, Token, or JWT). + +### Revoking Certificates + +Added in: v4.5.0 + +Certificates used in replication can be revoked using the certificate serial number. Use either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config: + +Via the operations API: + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "revoked_certificates": ["1769F7D6A"] +} +``` + +Via `harperdb-config.yaml`: + +```yaml +replication: + routes: + - hostname: server-three + port: 9930 + revokedCertificates: + - 1769F7D6A + - QA69C7E2S +``` + +### Insecure IP-Based Authentication + +For development, testing, or secure private networks, you can disable TLS and use IP addresses to authenticate nodes. Configure replication on an insecure port and set up IP-based routes: + +```yaml +replication: + port: 9933 + routes: + - 127.0.0.2 + - 127.0.0.3 +``` + +> **Warning**: Never use insecure connections for production systems accessible from the public internet. + +Loopback addresses (`127.0.0.X`) are a convenient way to run multiple nodes on a single machine for local development. + +## Controlling Replication Flow + +By default, Harper replicates all data in all databases with symmetric bidirectional flow. To restrict replication to one direction between certain nodes, set `sends` and `receives` on the route configuration: + +```yaml +replication: + databases: + - data + routes: + - host: node-two + replicates: + sends: false + receives: true + - host: node-three + replicates: + sends: true + receives: false +``` + +In this example, the local node only receives from `node-two` (one-way inbound) and only sends to `node-three` (one-way outbound). + +> **Note**: When using controlled flow replication, avoid replicating the `system` database. The `system` database contains node configurations, so replicating it would cause all nodes to have identical (and incorrect) route configurations. + +### Explicit Subscriptions + +By default, Harper automatically manages connections and subscriptions between nodes. Explicit subscriptions exist only for testing, debugging, and legacy migration — they should not be used for production replication and will likely be removed in v5. + +With explicit subscriptions, Harper no longer guarantees data consistency. If you want unidirectional replication, use [controlled replication flow](#controlling-replication-flow) instead. + +To explicitly subscribe, use `add_node` with subscription definitions: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "publish": true, + "subscribe": false + } + ] +} +``` + +Update a subscription with `update_node`: + +```json +{ + "operation": "update_node", + "hostname": "server-two", + "subscriptions": [ + { + "database": "dev", + "table": "my-table", + "publish": true, + "subscribe": true + } + ] +} +``` + +## Monitoring Replication + +Added in: v4.5.0 (cluster status timing statistics) + +Use `cluster_status` to monitor the state of replication: + +```json +{ + "operation": "cluster_status" +} +``` + +See [Clustering Operations](./clustering.md#cluster-status) for the full response schema and field descriptions. + +## Initial Synchronization and Resynchronization + +When a new node is added and its database has not been previously synced, Harper downloads the full database from the first node it connects to. After the initial sync completes, the node enters replication mode and receives incremental updates. + +If a node goes offline and comes back, it resynchronizes automatically to catch up on missed transactions. + +You can also specify a `start_time` in the `add_node` operation to limit the initial download to data since a given point in time: + +```json +{ + "operation": "add_node", + "hostname": "server-two", + "start_time": "2024-01-01T00:00:00.000Z" +} +``` + +## Replicated Transactions + +The following data operations are replicated across the cluster: + +- Insert +- Update +- Upsert +- Delete +- Bulk loads (CSV data load, CSV file load, CSV URL load, import from S3) + +**Destructive schema operations are not replicated**: `drop_database`, `drop_table`, and `drop_attribute` must be run on each node independently. + +Users and roles are not replicated across the cluster. + +Certain management operations — including component deployment and rolling restarts — can also be replicated across the cluster. + +## Inspecting Cluster Configuration + +Query the `hdb_nodes` system table to inspect the current known nodes and their configuration: + +```json +{ + "operation": "search_by_value", + "database": "system", + "table": "hdb_nodes", + "attribute": "name", + "value": "*" +} +``` + +The `hdb_certificate` table contains the certificates used for replication connections. + +## See Also + +- [Clustering Operations](./clustering.md) — Operations API for managing cluster nodes and subscriptions +- [Sharding](./sharding.md) — Distributing data across a subset of nodes +- [Certificate Management](../security/certificate-management.md) diff --git a/reference/replication/sharding.md b/reference/replication/sharding.md new file mode 100644 index 00000000..6625045a --- /dev/null +++ b/reference/replication/sharding.md @@ -0,0 +1,209 @@ +--- +title: Sharding +--- + + + + + +# Sharding + +Added in: v4.4.0 (provisional) + +Changed in: v4.5.0 — expanded sharding functionality: Harper now honors write requests with residency information that will not be stored on the local node, and nodes can be declaratively configured as part of a shard. + +Harper's replication system supports sharding — storing different data across different subsets of nodes — while still allowing data to be accessed from any node in the cluster. This enables horizontal scalability for storage and write performance, while maintaining optimal data locality and consistency. + +When sharding is configured, requests for records that don't reside on the handling node are automatically forwarded to the appropriate node transparently. Clients do not need to know where data is stored. + +By default (without sharding), Harper replicates all data to all nodes. + +## Approaches to Sharding + +There are two main approaches: + +**Dynamic sharding** — the location (residency) of records is determined dynamically based on where the record was written, the record's data, or a custom function. Records can be relocated dynamically based on where they are accessed. Residency information is specific to each record. + +**Static sharding** — each node is assigned to a specific numbered shard, and each record is replicated to the nodes in that shard based on the primary key, regardless of where the data was written or accessed. More predictable than dynamic sharding: data location is always determinable from the primary key. + +## Dynamic Sharding + +### Replication Count + +The simplest way to limit replication is to configure a replication count. Set `replicateTo` in the `replication` section of `harperdb-config.yaml` to specify how many additional nodes data should be replicated to: + +```yaml +replication: + replicateTo: 2 +``` + +This ensures each record is stored on three nodes total (the node that first stored it, plus two others). + +### Replication Control via REST Header + +With the REST interface, you can specify replication targets and confirmation requirements per request using the `X-Replicate-To` header: + +```http +PUT /MyTable/3 +X-Replicate-To: 2;confirm=1 +``` + +- `2` — replicate to two additional nodes +- `confirm=1` — wait for confirmation from one additional node before responding + +Specify exact destination nodes by hostname: + +```http +PUT /MyTable/3 +X-Replicate-To: node1,node2 +``` + +The `confirm` parameter can be combined with explicit node lists. + +### Replication Control via Operations API + +Specify `replicateTo` and `replicatedConfirmation` in the operation body: + +```json +{ + "operation": "update", + "schema": "dev", + "table": "MyTable", + "hashValues": [3], + "record": { + "name": "John Doe" + }, + "replicateTo": 2, + "replicatedConfirmation": 1 +} +``` + +Or specify explicit nodes: + +```jsonc +{ + // ... + "replicateTo": ["node-1", "node-2"], + // ... +} +``` + +### Programmatic Replication Control + +Set `replicateTo` and `replicatedConfirmation` programmatically in a resource method: + +```javascript +class MyTable extends tables.MyTable { + put(record) { + const context = this.getContext(); + context.replicateTo = 2; // or an array of node names + context.replicatedConfirmation = 1; + return super.put(record); + } +} +``` + +## Static Sharding + +### Basic Static Shard Configuration + +Assign a node to a numbered shard in `harperdb-config.yaml`: + +```yaml +replication: + shard: 1 +``` + +Or assign shards per route: + +```yaml +replication: + routes: + - hostname: node1 + shard: 1 + - hostname: node2 + shard: 2 +``` + +Or dynamically via the operations API by including `shard` in an `add_node` or `set_node` operation: + +```json +{ + "operation": "add_node", + "hostname": "node1", + "shard": 1 +} +``` + +Once shards are configured, use `setResidency` or `setResidencyById` (described below) to assign records to specific shards. + +## Custom Sharding + +### By Record Content (`setResidency`) + +Define a custom residency function that is called with the full record. Return an array of node hostnames or a shard number. + +With this approach, record metadata (including residency information) and indexed properties are replicated to all nodes, but the full record is only stored on the specified nodes. + +Return node hostnames: + +```javascript +MyTable.setResidency((record) => { + return record.id % 2 === 0 ? ['node1'] : ['node2']; +}); +``` + +Return a shard number (replicates to all nodes in that shard): + +```javascript +MyTable.setResidency((record) => { + return record.id % 2 === 0 ? 1 : 2; +}); +``` + +### By Primary Key Only (`setResidencyById`) + +Define a residency function based solely on the primary key. Records (including metadata) are only replicated to the specified nodes — metadata does not need to be replicated everywhere, which allows data to be retrieved without needing access to record data or metadata on the requesting node. + +Return a shard number: + +```javascript +MyTable.setResidencyById((id) => { + return id % 2 === 0 ? 1 : 2; +}); +``` + +Return node hostnames: + +```javascript +MyTable.setResidencyById((id) => { + return id % 2 === 0 ? ['node1'] : ['node2']; +}); +``` + +## Disabling Cross-Node Access + +By default, sharding allows data stored on specific nodes to be accessed from any node — requests are forwarded transparently. To disable this and only return data if it is stored on the local node, set `replicateFrom` to `false`. + +Via the operations API: + +```json +{ + "operation": "search_by_id", + "table": "MyTable", + "ids": [3], + "replicateFrom": false +} +``` + +Via the REST API: + +```http +GET /MyTable/3 +X-Replicate-From: none +``` + +## See Also + +- [Replication Overview](./overview.md) — How Harper's replication system works +- [Clustering Operations](./clustering.md) — Operations API for managing cluster nodes diff --git a/reference/resources/overview.md b/reference/resources/overview.md new file mode 100644 index 00000000..09f47e5b --- /dev/null +++ b/reference/resources/overview.md @@ -0,0 +1,130 @@ +--- +title: Resources Overview +--- + + + + +# Resources + +Harper's Resource API is the foundation for building custom data access logic and connecting data sources. Resources are JavaScript classes that define how data is accessed, modified, subscribed to, and served over HTTP, MQTT, and WebSocket protocols. + +## What Is a Resource? + +A **Resource** is a class that provides a unified interface for a set of records or entities. Harper's built-in tables extend the base `Resource` class, and you can extend either `Resource` or a table class to implement custom behavior for any data source — internal or external. + +Added in: v4.2.0 + +The Resource API is designed to mirror REST/HTTP semantics: methods map directly to HTTP verbs (`get`, `put`, `patch`, `post`, `delete`), making it straightforward to build API endpoints alongside custom data logic. + +## Relationship to Other Features + +- **Database tables** extend `Resource` automatically. You can use tables through the Resource API without writing any custom code. +- The **REST plugin** maps incoming HTTP requests to Resource methods. See [REST Overview](../rest/overview.md). +- The **MQTT plugin** routes publish/subscribe messages to `publish` and `subscribe` Resource methods. See [MQTT Overview](../mqtt/overview.md). +- **Global APIs** (`tables`, `databases`, `transaction`) provide access to resources from JavaScript code. +- The **`jsResource` plugin** (configured in `config.yaml`) registers a JavaScript file's exported Resource classes as endpoints. + +## Resource API Versions + +The Resource API has two behavioral modes controlled by the `loadAsInstance` static property: + +- **V2 (recommended, `loadAsInstance = false`)**: Instance methods receive a `RequestTarget` as the first argument; no record is preloaded onto `this`. Recommended for all new code. +- **V1 (legacy, `loadAsInstance = true`)**: Instance methods are called with `this` pre-bound to the matching record. Preserved for backwards compatibility. + +The [Resource API reference](./resource-api.md) is written against V2. For V1 behavior and migration guidance, see the legacy instance binding section of that page. + +## Extending a Table + +The most common use case is extending an existing table to add custom logic. + +Starting with a table definition in a `schema.graphql`: + +```graphql +# Omit the `@export` directive +type MyTable @table { + id: Long @primaryKey + # ... +} +``` + +> For more info on the schema API see [`Database / Schema`](../database/schema.md) + +Then, in a `resources.js` extend from the `tables.MyTable` global: + +```javascript +export class MyTable extends tables.MyTable { + static loadAsInstance = false; // use V2 API + + get(target) { + // add a computed property before returning + + const record = await super.get(target) + + return { ...record, computedField: 'value' }; + } + + post(target, data) { + // custom action on POST + this.create({ ...data, status: 'pending' }); + } +} +``` + +Finally, ensure everything is configured appropriately: + +```yaml +rest: true +graphqlSchema: + files: schema.graphql +jsResource: + files: resources.js +``` + +## Custom External Data Source + +You can also extend the base `Resource` class directly to implement custom endpoints, or even wrap an external API or service as a custom caching layer: + +```javascript +export class CustomEndpoint extends Resource { + static loadAsInstance = false; + + get(target) { + return { + data: doSomething(), + }; + } +} + +export class MyExternalData extends Resource { + static loadAsInstance = false; + + async get(target) { + const response = await fetch(`https://api.example.com/${target.id}`); + return response.json(); + } + + put(target, data) { + return fetch(`https://api.example.com/${target.id}`, { + method: 'PUT', + body: JSON.stringify(data), + }); + } +} + +// Use as a cache source for a local table +tables.MyCache.sourcedFrom(MyExternalData); +``` + +Resources are the true customization point for Harper. This is where the business logic of a Harper application really lives. There is a lot more to this API than these examples show. Ensure you fully review the [Resource API](./resource-api.md) documentation, and consider exploring the Learn guides for more information. + +## Exporting Resources as Endpoints + +Resources become HTTP/MQTT endpoints when they are exported. As the examples demonstrated if a Resource extends an existing table, make sure to not have conflicting exports between the schema and the JavaScript implementation. Alternatively, you can register resources programmatically using `server.resources.set()`. See [HTTP API](../http/api.md) for server extension documentation. + +## Pages in This Section + +| Page | Description | +| --------------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| [Resource API](./resource-api.md) | Complete reference for instance methods, static methods, the Query object, RequestTarget, and response handling | +| [Query Optimization](./query-optimization.md) | How Harper executes queries and how to write performant conditions | diff --git a/reference/resources/query-optimization.md b/reference/resources/query-optimization.md new file mode 100644 index 00000000..6501565a --- /dev/null +++ b/reference/resources/query-optimization.md @@ -0,0 +1,219 @@ +--- +title: Query Optimization +--- + + + + +# Query Optimization + +Added in: v4.3.0 (query planning and execution improvements) + +Harper has powerful query functionality with excellent performance characteristics. Like any database, different queries can vary significantly in performance. Understanding how querying works helps you write queries that perform well as your dataset grows. + +## Query Execution + +At a fundamental level, querying involves defining conditions to find matching data and then executing those conditions against the database. Harper supports indexed fields, and these indexes are used to speed up query execution. + +When conditions are specified in a query, Harper attempts to utilize indexes to optimize the speed of query execution. When a field is not indexed, Harper checks each potential record to determine if it matches the condition — this is a full table scan and degrades as data grows (`O(n)`). + +When a query has multiple conditions, Harper attempts to optimize their execution order. For intersecting conditions (the default `and` operator), Harper applies the most selective and performant condition first. If one condition can use an index and is more selective than another, it is used first to narrow the candidate set before filtering on the remaining conditions. + +The `search` method supports an `explain` flag that returns the query execution order Harper determined, useful for debugging and optimization: + +```javascript +const result = await MyTable.search({ + conditions: [...], + explain: true, +}); +``` + +For union queries (`or` operator), each condition is executed separately and the results are merged. + +## Conditions, Operators, and Indexing + +When a query is executed, conditions are evaluated against the database. Indexed fields significantly improve query performance. + +### Index Performance Characteristics + +| Operator | Uses index | Notes | +| -------------------------------------------------------------------- | ------------------ | ------------------------------------------------------------------------ | +| `equals` | Yes | Fast lookup in sorted index | +| `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal` | Yes | Range scan in sorted index; narrower range = faster | +| `starts_with` | Yes | Prefix search in sorted index | +| `not_equal` | No | Full scan required (unless combined with selective indexed condition) | +| `contains` | No | Full scan required | +| `ends_with` | No | Full scan required | +| `!= null` | Yes (special case) | Can use indexes to find non-null records; only helpful for sparse fields | + +**Rule of thumb**: Use `equals`, range operators, and `starts_with` on indexed fields. Avoid `contains`, `ends_with`, and `not_equal` as the sole or first condition in large datasets. + +### Indexed vs. Non-Indexed Fields + +Indexed fields provide `O(log n)` lookup — fast even as the dataset grows. Non-indexed fields require `O(n)` full table scans. + +Trade-off: indexes speed up reads but add overhead to writes (insert/update/delete must update the index). This is usually worth it for frequently queried fields. + +### Primary Key vs. Secondary Index + +Querying on a **primary key** is faster than querying on a secondary (non-primary) index, because the primary key directly addresses the record without cross-referencing. + +Secondary indexes are still valuable for query conditions on other fields, but expect slightly more overhead than primary key lookups. + +### Cardinality + +More unique values (higher cardinality) = more efficient indexed lookups. For example, an index on a boolean field has very low cardinality (only two possible values) and is less efficient than an index on a `UUID` field. High-cardinality fields benefit most from indexing. + +## Relationships and Joins + +Harper supports relationship-based queries that join data across tables. See [Schema documentation](../database/schema.md) for how to define relationships. + +Join queries involve more lookups and naturally carry more overhead. The same indexing principles apply: + +- Conditions on joined table fields should use indexed columns for best performance. +- If a relationship uses a foreign key, that foreign key should be indexed in both tables. +- Higher cardinality foreign keys make joins more efficient. + +Example of an indexed foreign key that enables efficient join queries: + +```graphql +type Product @table { + id: Long @primaryKey + brandId: Long @indexed # foreign key — index this + brand: Related @relation(from: "brandId") +} +type Brand @table { + id: Long @primaryKey + name: String @indexed # indexed — enables efficient brand.name queries + products: Product @relation(to: "brandId") +} +``` + +Added in: v4.3.0 + +## Sorting + +Sorting can significantly impact query performance. + +- **Aligned sort and index**: If the sort attribute is the same indexed field used in the primary condition, Harper can use the index to retrieve results already in order — very fast. +- **Unaligned sort**: If the sort is on a different field than the condition, or the sort field is not indexed, Harper must retrieve and sort all matching records. For large result sets this can be slow, and it also **defeats streaming** (see below). + +Best practice: sort on the same indexed field you are filtering on, or sort on a secondary indexed field with a narrow enough condition to produce a manageable result set. + +## Streaming + +Harper can stream query results — returning records as they are found rather than waiting for the entire query to complete. This improves time-to-first-byte for large queries and reduces peak memory usage. + +**Streaming is defeated** when: + +- A sort order is specified that is not aligned with the condition's index +- The full result set must be materialized to perform sorting + +When streaming is possible, results are returned as an `AsyncIterable`: + +```javascript +for await (const record of MyTable.search({ conditions: [...] })) { + // process each record as it arrives +} +``` + +Failing to iterate the `AsyncIterable` to completion keeps a read transaction open, degrading performance. Always ensure you either fully iterate or explicitly release the query. + +### Draining or Releasing a Query + +An open query holds an active read transaction. While that transaction is open, the underlying data pages and internal state for the query cannot be freed — they remain pinned in memory until the transaction closes. In long-running processes or under high concurrency, accumulating unreleased transactions degrades throughput and increases memory pressure. + +The transaction closes automatically once the `AsyncIterable` is fully iterated. If you need to stop early, you must explicitly signal that iteration is complete so Harper can release the transaction. + +**Breaking out of a `for await...of` loop** is the most natural way. The JavaScript runtime automatically calls `.return()` on the iterator when a `break`, `return`, or `throw` exits the loop: + +```javascript +for await (const record of MyTable.search({ conditions: [...] })) { + if (meetsStopCriteria(record)) { + break; // iterator.return() is called automatically — transaction is released + } + process(record); +} +``` + +**Calling `.return()` manually** is useful when you hold an iterator reference directly: + +```javascript +const iterator = MyTable.search({ conditions: [...] })[Symbol.asyncIterator](); +try { + const { value } = await iterator.next(); + process(value); +} finally { + await iterator.return(); // explicitly closes the iterator and releases the transaction +} +``` + +Avoid storing an iterator and abandoning it (e.g. never calling `.next()` again without calling `.return()`), as the transaction will remain open until the iterator is garbage collected — which is non-deterministic. + +## Practical Guidance + +### Index fields you query on frequently + +```graphql +type Product @table { + id: Long @primaryKey + name: String @indexed # queried frequently + category: String @indexed # queried frequently + description: String # not indexed (rarely in conditions) +} +``` + +### Use `explain` to diagnose slow queries + +```javascript +const result = await Product.search({ + conditions: [ + { attribute: 'category', value: 'electronics' }, + { attribute: 'price', comparator: 'less_than', value: 100 }, + ], + explain: true, +}); +// result shows the actual execution order Harper selected +``` + +### Prefer selective conditions first + +When Harper cannot auto-reorder (e.g. with `enforceExecutionOrder`), put the most selective condition first: + +```javascript +// Better: indexed, selective condition first +Product.search({ + conditions: [ + { attribute: 'sku', value: 'ABC-001' }, // exact match on indexed unique field + { attribute: 'active', value: true }, // low cardinality filter + ], +}); +``` + +### Use `limit` and `offset` for pagination + +```javascript +Product.search({ + conditions: [...], + sort: { attribute: 'createdAt', descending: true }, + limit: 20, + offset: page * 20, +}); +``` + +### Avoid wide range queries on non-indexed fields + +```javascript +// Slow: non-indexed field with range condition +Product.search({ + conditions: [{ attribute: 'description', comparator: 'contains', value: 'sale' }], +}); + +// Better: use an indexed field condition to narrow first +Product.search({ + conditions: [ + { attribute: 'category', value: 'clothing' }, // indexed — narrows to subset + { attribute: 'description', comparator: 'contains', value: 'sale' }, // non-indexed, applied to smaller set + ], +}); +``` diff --git a/reference/resources/resource-api.md b/reference/resources/resource-api.md new file mode 100644 index 00000000..8c047d48 --- /dev/null +++ b/reference/resources/resource-api.md @@ -0,0 +1,635 @@ +--- +title: Resource API +--- + + + + + + + + + + +# Resource API + +Added in: v4.2.0 + +The Resource API provides a unified JavaScript interface for accessing, querying, modifying, and subscribing to data resources in Harper. Tables extend the base `Resource` class, and all resource interactions — whether from HTTP requests, MQTT messages, or application code — flow through this interface. + +## API Versions + +The Resource API has two behavioral modes selected by the `loadAsInstance` static property: + +| Version | `loadAsInstance` | Status | +| ------------ | ---------------- | ------------------------------------- | +| V2 (current) | `false` | Recommended for new code | +| V1 (legacy) | `true` (default) | Preserved for backwards compatibility | + +The default value of `loadAsInstance` is `true` (V1 behavior). To opt in to V2, you must explicitly set `static loadAsInstance = false` on your resource class. + +This page documents V2 behavior (`loadAsInstance = false`). For V1 (legacy instance binding) behavior and migration examples, see [Legacy Instance Binding](#legacy-instance-binding-v1). + +### V2 Behavioral Differences from V1 + +Changed in: v4.6.0 (Resource API upgrades that formalized V2) + +When `loadAsInstance = false`: + +- Instance methods receive a `RequestTarget` as their first argument; no record is preloaded onto `this`. +- The `get` method returns the record as a plain (frozen) object rather than a Resource instance. +- `put`, `post`, and `patch` receive `(target, data)` — **arguments are reversed from V1**. +- Authorization is handled via `target.checkPermission` rather than `allowRead`/`allowUpdate`/etc. methods. Set it to `false` to bypass permission checks entirely (e.g. for a public read endpoint), or leave it at its default to require superuser access for write operations: + + ```javascript + // Public read — no auth required + get(target) { + target.checkPermission = false; + return super.get(target); + } + + // POST is superuser-only by default — no change needed + post(target, data) { + return super.post(target, data); + } + ``` + + `checkPermission` can also be set to a non-boolean value to delegate to role-based or schema-defined permissions — see the authorization documentation for details. + +- The `update` method returns an `Updatable` object instead of a Resource instance. +- Context is tracked automatically via async context tracking; set `static explicitContext = true` to disable (improves performance). +- `getId()` is not used and returns `undefined`. + +--- + +## Resource Instance Methods + +These methods are defined on a Resource class and called when requests are routed to the resource. Override them to define custom behavior. + +### `get(target: RequestTarget): Promise | AsyncIterable` + +Called for HTTP GET requests. When the request targets a single record (e.g. `/Table/some-id`), returns a single record object. When the request targets a collection (e.g. `/Table/?name=value`), the `target.isCollection` property is `true` and the default behavior calls `search()`, returning an `AsyncIterable`. + +```javascript +class MyResource extends Resource { + static loadAsInstance = false; + + get(target) { + const id = target.id; // primary key from URL path + const param = target.get('param1'); // query string param + const path = target.pathname; // path relative to resource + return super.get(target); // default: return the record + } +} +``` + +The default `super.get(target)` returns a `RecordObject` — a frozen plain object with the record's properties plus `getUpdatedTime()` and `getExpiresAt()`. + +:::caution Common gotchas + +- **`/Table` vs `/Table/`** — `GET /Table` returns metadata about the table resource itself. `GET /Table/` (trailing slash) targets the collection and invokes `get()` as a collection request. These are distinct endpoints. +- **Case sensitivity** — The URL path must match the exact casing of the exported resource or table name. `/Table/` works; `/table/` returns a 404. + +::: + +### `search(query: RequestTarget): AsyncIterable` + +Performs a query on the resource or table. Called by `get()` on collection requests. Can be overridden to define custom query behavior. The default implementation on tables queries by the `conditions`, `limit`, `offset`, `select`, and `sort` properties parsed from the URL. + +### `put(target: RequestTarget | Id, data: object): void | Response` + +Called for HTTP PUT requests. Writes the full record to the table, creating or replacing the existing record. + +```javascript +put(target, data) { + // validate or transform before saving + super.put(target, { ...data, status: data.status ?? 'active' }); +} +``` + +### `patch(target: RequestTarget | Id, data: object): void | Response` + +Called for HTTP PATCH requests. Merges `data` into the existing record, preserving any properties not included in `data`. + +Added in: v4.3.0 (CRDT support for individual property updates via PATCH) + +### `post(target: RequestTarget | Id, data: object): void | Response` + +Called for HTTP POST requests. Default behavior creates a new record. Override to implement custom actions. + +### `delete(target: RequestTarget | Id): void | Response` + +Called for HTTP DELETE requests. Default behavior deletes the record identified by `target`. + +### `update(target: RequestTarget, updates?: object): Updatable` + +Returns an `Updatable` instance providing mutable property access to a record. Any property changes on the `Updatable` are written to the database when the transaction commits. + +```javascript +post(target, data) { + const record = this.update(target.id); + record.quantity = record.quantity - 1; + // saved automatically on transaction commit +} +``` + +#### `Updatable` class + +The `Updatable` class provides direct property access plus: + +##### `addTo(property: string, value: number)` + +Adds `value` to `property` using CRDT incrementation — safe for concurrent updates across threads and nodes. + +Added in: v4.3.0 + +```javascript +post(target, data) { + const record = this.update(target.id); + record.addTo('quantity', -1); // decrement safely across nodes +} +``` + +##### `subtractFrom(property: string, value: number)` + +Subtracts `value` from `property` using CRDT incrementation. + +##### `set(property: string, value: any): void` + +Sets a property to `value`. Equivalent to direct property assignment (`record.property = value`), but useful when the property name is dynamic. + +```javascript +const record = this.update(target.id); +record.set('status', 'active'); +``` + +##### `getProperty(property: string): any` + +Returns the current value of `property` from the record. Useful when the property name is dynamic or when you want an explicit read rather than direct property access. + +```javascript +const record = this.update(target.id); +const current = record.getProperty('status'); +``` + +##### `getUpdatedTime(): number` + +Returns the last updated time as milliseconds since epoch. + +##### `getExpiresAt(): number` + +Returns the expiration time, if one is set. + +### `publish(target: RequestTarget, message: object): void | Response` + +Called for MQTT publish commands. Default behavior records the message and notifies subscribers without changing the record's stored data. + +### `subscribe(subscriptionRequest?: SubscriptionRequest): Promise` + +Called for MQTT subscribe commands. Returns a `Subscription` — an `AsyncIterable` of messages/changes. + +#### `SubscriptionRequest` options + +All properties are optional: + +| Property | Description | +| -------------------- | ---------------------------------------------------------------------------------------------- | +| `includeDescendants` | Include all updates with an id prefixed by the subscribed id (e.g. `sub/*`) | +| `startTime` | Start from a past time (catch-up of historical messages). Cannot be used with `previousCount`. | +| `previousCount` | Return the last N updates/messages. Cannot be used with `startTime`. | +| `omitCurrent` | Do not send the current/retained record as the first update. | + +### `connect(target: RequestTarget, incomingMessages?: AsyncIterable): AsyncIterable` + +Called for WebSocket and Server-Sent Events connections. `incomingMessages` is provided for WebSocket connections (not SSE). Returns an `AsyncIterable` of messages to send to the client. + +### `invalidate(target: RequestTarget)` + +Marks the specified record as invalid in a caching table, so it will be reloaded from the source on next access. + +### `allowStaleWhileRevalidate(entry, id): boolean` + +For caching tables: return `true` to serve the stale entry while revalidation happens concurrently; `false` to wait for the fresh value. + +Entry properties: + +- `version` — Timestamp/version from the source +- `localTime` — When the resource was last refreshed locally +- `expiresAt` — When the entry became stale +- `value` — The stale record value + +### `getUpdatedTime(): number` + +Returns the last updated time of the resource (milliseconds since epoch). + +### `wasLoadedFromSource(): boolean` + +For caching tables, indicates that this request was a cache miss and the data was loaded from the source resource. + +### `getContext(): Context` + +Returns the current context, which includes: + +- `user` — User object with username, role, and authorization information +- `transaction` — The current transaction + +When triggered by HTTP, the context is the `Request` object with these additional properties: + +- `url` — Full local path including query string +- `method` — HTTP method +- `headers` — Request headers (access with `context.headers.get(name)`) +- `responseHeaders` — Response headers (set with `context.responseHeaders.set(name, value)`) +- `pathname` — Path without query string +- `host` — Host from the `Host` header +- `ip` — Client IP address +- `body` — Raw Node.js `Readable` stream (if a request body exists) +- `data` — Promise resolving to the deserialized request body +- `lastModified` — Controls the `ETag`/`Last-Modified` response header +- `requestContext` — (For source resources only) Context of the upstream resource making the data request + +### `operation(operationObject: object, authorize?: boolean): Promise` + +Executes a Harper operations API call using this table as the target. Set `authorize` to `true` to enforce current-user authorization. + +--- + +## Resource Static Methods + +Static methods are the preferred way to interact with tables and resources from application code. They handle transaction setup, access checks, and request parsing automatically. + +All instance methods have static equivalents that accept an `id` or `RequestTarget` as the first argument: + +### `get(target: RequestTarget | Id | Query, context?: Resource | Context)` + +Retrieve a record by primary key, or query for records. + +```javascript +// By primary key +const product = await Product.get(34); + +// By query object +const product = await Product.get({ id: 34, select: ['name', 'price'] }); + +// Iterate a collection query +for await (const record of Product.get({ conditions: [{ attribute: 'inStock', value: true }] })) { + // ... +} +``` + +### `put(target: RequestTarget | Id, record: object, context?): Promise` + +### `put(record: object, context?): Promise` + +Save a record (create or replace). The second form reads the primary key from the `record` object. + +### `create(record: object, context?): Promise` + +Create a new record with an auto-generated primary key. Returns the created record. Do not include a primary key in the `record` argument. + +Added in: v4.2.0 + +### `patch(target: RequestTarget | Id, updates: object, context?): Promise` + +Apply partial updates to an existing record. + +### `post(target: RequestTarget | Id, data: object, context?): Promise` + +Call the `post` instance method. Defaults to creating a new record. + +### `delete(target: RequestTarget | Id, context?): Promise` + +Delete a record. + +### `publish(target: RequestTarget | Id, message: object, context?): Promise` + +Publish a message to a record/topic. + +### `subscribe(subscriptionRequest?, context?): Promise` + +Subscribe to record changes or messages. + +### `search(query: RequestTarget | Query, context?): AsyncIterable` + +Query the table. See [Query Object](#query-object) below for available query options. + +### `setComputedAttribute(name: string, computeFunction: (record) => any)` + +Define the compute function for a `@computed` schema attribute. + +Added in: v4.4.0 + +```javascript +MyTable.setComputedAttribute('fullName', (record) => `${record.firstName} ${record.lastName}`); +``` + +### `getRecordCount({ exactCount?: boolean }): Promise<{ recordCount: number, estimatedRange?: [number, number] }>` + +Returns the number of records in the table. By default returns an approximate (fast) count. Pass `{ exactCount: true }` for a precise count. + +Added in: v4.5.0 + +### `sourcedFrom(Resource, options?)` + +Configure a table to use another resource as its data source (caching behavior). When a record is not found locally, it is fetched from the source and cached. Writes are delegated to the source. + +Options: + +- `expiration` — Default TTL in seconds +- `eviction` — Eviction time in seconds +- `scanInterval` — Period for scanning expired records + +### `parsePath(path, context, query)` + +Called by static methods when processing a URL path. Can be overridden to preserve the path directly as the primary key: + +```javascript +static parsePath(path) { + return path; // use full path as id, no parsing +} +``` + +### `directURLMapping` + +Set this static property to `true` to map the full URL (including query string) as the primary key, bypassing query parsing. + +Added in: v4.5.0 (documented in improved URL path parsing) + +```javascript +export class MyTable extends tables.MyTable { + static directURLMapping = true; +} +// GET /MyTable/test?foo=bar → primary key is 'test?foo=bar' +``` + +### `primaryKey` + +The name of the primary key attribute for the table. + +```javascript +const record = await Table.get(34); +record[Table.primaryKey]; // → 34 +``` + +### `isCollection(resource): boolean` + +Returns `true` if the resource instance represents a collection (query result) rather than a single record. + +--- + +## Query Object + +The `Query` object is accepted by `search()` and the static `get()` method. + +### `conditions` + +Array of condition objects to filter records. Each condition: + +| Property | Description | +| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `attribute` | Property name, or an array for chained/joined properties (e.g. `['brand', 'name']`) | +| `value` | The value to match | +| `comparator` | `equals` (default), `greater_than`, `greater_than_equal`, `less_than`, `less_than_equal`, `starts_with`, `contains`, `ends_with`, `between`, `not_equal` | +| `conditions` | Nested conditions array | +| `operator` | `and` (default) or `or` for the nested `conditions` | + +Example with nested conditions: + +```javascript +Product.search({ + conditions: [ + { attribute: 'price', comparator: 'less_than', value: 100 }, + { + operator: 'or', + conditions: [ + { attribute: 'rating', comparator: 'greater_than', value: 4 }, + { attribute: 'featured', value: true }, + ], + }, + ], +}); +``` + +**Chained attribute references** (for relationships/joins): Use an array to traverse relationship properties: + +```javascript +Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); +``` + +Added in: v4.3.0 + +### `operator` + +Top-level `and` (default) or `or` for the `conditions` array. + +### `limit` + +Maximum number of records to return. + +### `offset` + +Number of records to skip (for pagination). + +### `select` + +Properties to include in each returned record. Can be: + +- Array of property names: `['name', 'price']` +- Nested select for related records: `[{ name: 'brand', select: ['id', 'name'] }]` +- String to return a single property per record: `'id'` + +Special properties: + +- `$id` — Returns the primary key regardless of its name +- `$updatedtime` — Returns the last-updated timestamp + +### `sort` + +Sort order object: + +| Property | Description | +| ------------ | ---------------------------------------------------------- | +| `attribute` | Property name (or array for chained relationship property) | +| `descending` | Sort descending if `true` (default: `false`) | +| `next` | Secondary sort to resolve ties (same structure) | + +### `explain` + +If `true`, returns conditions reordered as Harper will execute them (for debugging and optimization). + +### `enforceExecutionOrder` + +If `true`, forces conditions to execute in the order supplied, disabling Harper's automatic re-ordering optimization. + +--- + +## RequestTarget + +`RequestTarget` represents a URL path mapped to a resource. It is a subclass of `URLSearchParams`. + +Properties: + +- `pathname` — Path relative to the resource, without query string +- `search` — The query/search string portion of the URL +- `id` — Primary key derived from the path +- `isCollection` — `true` when the request targets a collection +- `checkPermission` — Set to indicate authorization should be performed; has `action`, `resource`, and `user` sub-properties + +Standard `URLSearchParams` methods are available: + +- `get(name)`, `getAll(name)`, `set(name, value)`, `append(name, value)`, `delete(name)`, `has(name)` +- Iterable: `for (const [name, value] of target) { ... }` + +When a URL uses Harper's extended query syntax, these are parsed onto the target: + +- `conditions`, `limit`, `offset`, `sort`, `select` + +--- + +## RecordObject + +The `get()` method returns a `RecordObject` — a frozen plain object with all record properties, plus: + +- `getUpdatedTime(): number` — Last updated time (milliseconds since epoch) +- `getExpiresAt(): number` — Expiration time, if set + +--- + +## Response Object + +Resource methods can return: + +1. **Plain data** — serialized using content negotiation +2. **`Response`-like object** with `status`, `headers`, and `data` or `body`: + +```javascript +// Redirect +return { status: 302, headers: { Location: '/new-location' } }; + +// Custom header with data +return { status: 200, headers: { 'X-Custom-Header': 'value' }, data: { message: 'ok' } }; +``` + +`body` must be a string, `Buffer`, Node.js stream, or `ReadableStream`. `data` is an object that will be serialized. + +Added in: v4.4.0 + +### Throwing Errors + +Uncaught errors are caught by the protocol handler. For REST, they produce error responses. Set `error.statusCode` to control the HTTP status: + +```javascript +if (!authorized) { + const error = new Error('Forbidden'); + error.statusCode = 403; + throw error; +} +``` + +--- + +## Context and Transactions + +Whenever you call other resources from within a resource method, pass `this` as the context argument to share the transaction and ensure atomicity: + +```javascript +export class BlogPost extends tables.BlogPost { + static loadAsInstance = false; + + post(target, data) { + // both writes share the same transaction + tables.Comment.put(data, this); + const post = this.update(target.id); + post.commentCount = (post.commentCount ?? 0) + 1; + } +} +``` + +See [JavaScript Environment — transaction](../components/javascript-environment.md#transactionfn) for explicitly starting transactions outside of request handlers. + +--- + +## Legacy Instance Binding (V1) + +This documents the legacy `loadAsInstance = true` (or default pre-V2) behavior. The V2 API is recommended for all new code. + +When `loadAsInstance` is not `false` (or is explicitly `true`): + +- `this` is pre-bound to the matching record when instance methods are called. +- `this.getId()` returns the current record's primary key. +- Instance properties map directly to the record's fields. +- `get(query)` and `put(data, query)` have arguments in the older order (no `target` first). +- `allowRead()`, `allowUpdate()`, `allowCreate()`, `allowDelete()` methods are used for authorization. + +```javascript +export class MyExternalData extends Resource { + static loadAsInstance = true; + + async get() { + const response = await this.fetch(this.id); + return response; + } + + put(data) { + // write to external source + } + + delete() { + // delete from external source + } +} + +tables.MyCache.sourcedFrom(MyExternalData); +``` + +### Migration from V1 to V2 + +Updated `get`: + +```javascript +// V1 +async get(query) { + let id = this.getId(); + this.newProperty = 'value'; + return super.get(query); +} + +// V2 +static loadAsInstance = false; +async get(target) { + let id = target.id; + let record = await super.get(target); + return { ...record, newProperty: 'value' }; // record is frozen; spread to add properties +} +``` + +Updated authorization: + +```javascript +// V1 +allowRead(user) { + return !!user; +} + +// V2 +static loadAsInstance = false; +async get(target) { + if (!this.getContext().user) { + const error = new Error('Unauthorized'); + error.statusCode = 401; + throw error; + } + target.checkPermission = false; + return super.get(target); +} +``` + +Updated `post` (note reversed argument order): + +```javascript +// V1 +async post(data, query) { ... } + +// V2 +static loadAsInstance = false; +async post(target, data) { ... } // target is first +``` diff --git a/reference/rest/content-types.md b/reference/rest/content-types.md new file mode 100644 index 00000000..23638472 --- /dev/null +++ b/reference/rest/content-types.md @@ -0,0 +1,100 @@ +--- +title: Content Types +--- + + + + +# Content Types + +Harper supports multiple content types (MIME types) for both HTTP request bodies and response bodies. Harper follows HTTP standards: use the `Content-Type` request header to specify the encoding of the request body, and use the `Accept` header to request a specific response format. + +```http +Content-Type: application/cbor +Accept: application/cbor +``` + +All content types work with any standard Harper operation. + +## Supported Formats + +### JSON — `application/json` + +JSON is the most widely used format, readable and easy to work with. It is well-supported across all HTTP tooling. + +**Limitations**: JSON does not natively support all Harper data types — binary data, `Date`, `Map`, and `Set` values require special handling. JSON also produces larger payloads than binary formats. + +**When to use**: Web development, debugging, interoperability with third-party clients, or when the standard JSON type set is sufficient. Pairing JSON with compression (`Accept-Encoding: gzip, br`) often yields compact network transfers due to favorable Huffman coding characteristics. + +### CBOR — `application/cbor` + +CBOR is the recommended format for most production use cases. It is a highly efficient binary format with native support for the full range of Harper data types, including binary data, typed dates, and explicit Maps/Sets. + +**Advantages**: Very compact encoding, fast serialization, native streaming support (indefinite-length arrays for optimal time-to-first-byte on query results). Well-standardized with growing ecosystem support. + +**When to use**: Production APIs, performance-sensitive applications, or any scenario requiring rich data types. + +### MessagePack — `application/x-msgpack` + +MessagePack is another efficient binary format similar to CBOR, with broader adoption in some ecosystems. It supports all Harper data types. + +**Limitations**: MessagePack does not natively support streaming arrays, so query results are returned as a concatenated sequence of MessagePack objects. Decoders must be prepared to handle a sequence of values rather than a single document. + +**When to use**: Systems with existing MessagePack support that don't have CBOR available, or when interoperability with MessagePack clients is required. CBOR is generally preferred when both are available. + +### CSV — `text/csv` + +Comma-separated values format, suitable for data export and spreadsheet import/export. CSV lacks hierarchical structure and explicit typing. + +**When to use**: Ad-hoc data export, spreadsheet workflows, batch data processing. Not recommended for frequent or production API use. + +## Content Type via URL Extension + +As an alternative to the `Accept` header, responses can be requested in a specific format using file-style URL extensions: + +```http +GET /product/some-id.csv +GET /product/.msgpack?category=software +``` + +Using the `Accept` header is the recommended approach for clean, standard HTTP interactions. + +## Custom Content Types + +Harper's content type system is extensible. Custom handlers for any serialization format (XML, YAML, proprietary formats, etc.) can be registered in the [`contentTypes`](../components/javascript-environment.md#contenttypes) global Map. + +## Storing Arbitrary Content Types + +When a `PUT` or `POST` is made with a non-standard content type (e.g., `text/calendar`, `image/gif`), Harper stores the content as a record with `contentType` and `data` properties: + +```http +PUT /my-resource/33 +Content-Type: text/calendar + +BEGIN:VCALENDAR +VERSION:2.0 +... +``` + +This stores a record equivalent to: + +```json +{ "contentType": "text/calendar", "data": "BEGIN:VCALENDAR\nVERSION:2.0\n..." } +``` + +Retrieving a record that has `contentType` and `data` properties returns the response with the specified `Content-Type` and body. If the content type is not from the `text` family, the data is treated as binary (a Node.js `Buffer`). + +Use `application/octet-stream` for binary data or for uploading to a specific property: + +```http +PUT /my-resource/33/image +Content-Type: image/gif + +...image data... +``` + +## See Also + +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Headers](./headers.md) — Content negotiation headers +- [Querying](./querying.md) — URL query syntax diff --git a/reference/rest/headers.md b/reference/rest/headers.md new file mode 100644 index 00000000..818398eb --- /dev/null +++ b/reference/rest/headers.md @@ -0,0 +1,97 @@ +--- +title: REST Headers +--- + + + + +# REST Headers + +Harper's REST interface uses standard HTTP headers for content negotiation, caching, and performance instrumentation. + +## Response Headers + +These headers are included in all Harper REST API responses: + +| Header | Example Value | Description | +| --------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `server-timing` | `db;dur=7.165` | Duration of the operation in milliseconds. Follows the [Server-Timing](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Server-Timing) standard and can be consumed by network monitoring tools. | +| `content-type` | `application/json` | MIME type of the returned content, negotiated based on the `Accept` request header. | +| `etag` | `"abc123"` | Encoded version/last-modification time of the returned record. Used for conditional requests. | +| `location` | `/MyTable/new-id` | Returned on `POST` responses. Contains the path to the newly created record. | + +## Request Headers + +### Content-Type + +Specifies the format of the request body (for `PUT`, `PATCH`, `POST`): + +```http +Content-Type: application/json +Content-Type: application/cbor +Content-Type: application/x-msgpack +Content-Type: text/csv +``` + +See [Content Types](./content-types.md) for the full list of supported formats. + +### Accept + +Specifies the preferred response format: + +```http +Accept: application/json +Accept: application/cbor +Accept: application/x-msgpack +Accept: text/csv +``` + +### If-None-Match + +Used for conditional GET requests. Provide the `ETag` value from a previous response to avoid re-fetching unchanged data: + +```http +GET /MyTable/123 +If-None-Match: "abc123" +``` + +If the record has not changed, Harper returns `304 Not Modified` with no body. This avoids serialization and network transfer overhead and works seamlessly with browser caches and external HTTP caches. + +### Accept-Encoding + +Harper supports standard HTTP compression. Including this header enables compressed responses: + +```http +Accept-Encoding: gzip, br +``` + +Compression is particularly effective for JSON responses. For binary formats like CBOR, compression provides diminishing returns compared to the already-compact encoding. + +### Authorization + +Credentials for authenticating requests. See [Security Overview](../security/overview.md) for details on supported authentication mechanisms (Basic, JWT, mTLS). + +### Sec-WebSocket-Protocol + +When connecting via WebSocket for MQTT, the sub-protocol must be set to `mqtt` as required by the MQTT specification: + +```http +Sec-WebSocket-Protocol: mqtt +``` + +## Content Type via URL Extension + +As an alternative to the `Accept` header, content types can be specified using file-style extensions in the URL path: + +```http +GET /product/some-id.csv +GET /product/.msgpack?category=software +``` + +This is not recommended for production use — prefer the `Accept` header for clean, standard HTTP interactions. + +## See Also + +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Content Types](./content-types.md) — Supported encoding formats +- [Security Overview](../security/overview.md) — Authentication headers and mechanisms diff --git a/reference/rest/overview.md b/reference/rest/overview.md new file mode 100644 index 00000000..e38b2e4c --- /dev/null +++ b/reference/rest/overview.md @@ -0,0 +1,159 @@ +--- +title: REST Overview +--- + + + + + + +# REST Overview + +Added in: v4.2.0 + +Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation over HTTP, providing the best performance and HTTP interoperability with different clients. + +## How the REST Interface Works + +Harper's REST interface exposes database tables and custom resources as RESTful endpoints. Tables are **not** exported by default; they must be explicitly exported in a schema definition. The name of the exported resource defines the base of the endpoint path, served on the application HTTP server port (default `9926`). + +For more on defining schemas and exporting resources, see [Database / Schema](../database/schema.md). + +## Configuration + +Enable the REST interface by adding the `rest` plugin to your application's `config.yaml`: + +```yaml +rest: true +``` + +**Options**: + +```yaml +rest: + lastModified: true # enables Last-Modified response header support + webSocket: false # disables automatic WebSocket support (enabled by default) +``` + +## URL Structure + +The REST interface follows a consistent URL structure: + +| Path | Description | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| `/my-resource` | Root path — returns a description of the resource (e.g., table metadata) | +| `/my-resource/` | Trailing slash indicates a collection — represents all records; append query parameters to search | +| `/my-resource/record-id` | A specific record identified by its primary key | +| `/my-resource/record-id/` | Trailing slash — the collection of records with the given id prefix | +| `/my-resource/record-id/with/multiple/parts` | Record id with multiple path segments | + +Changed in: v4.5.0 — Resources can be defined with nested paths and accessed by exact path without a trailing slash. The `id.property` dot syntax for accessing properties via URL is only applied to properties declared in a schema. + +## HTTP Methods + +REST operations map to HTTP methods following uniform interface principles: + +### GET + +Retrieve a record or perform a search. Handled by the resource's `get()` method. + +```http +GET /MyTable/123 +``` + +Returns the record with primary key `123`. + +```http +GET /MyTable/?name=Harper +``` + +Returns records matching `name=Harper`. See [Querying](./querying.md) for the full query syntax. + +```http +GET /MyTable/123.propertyName +``` + +Returns a single property of a record. Only works for properties declared in the schema. + +#### Conditional Requests and Caching + +GET responses include an `ETag` header encoding the record's version/last-modification time. Clients with a cached copy can include `If-None-Match` on subsequent requests. If the record hasn't changed, Harper returns `304 Not Modified` with no body — avoiding serialization and network transfer overhead. + +### PUT + +Create or replace a record with a specified primary key (upsert semantics). Handled by the resource's `put(record)` method. The stored record will exactly match the submitted body — any properties not included in the body are removed from the previous record. + +```http +PUT /MyTable/123 +Content-Type: application/json + +{ "name": "some data" } +``` + +Creates or replaces the record with primary key `123`. + +### POST + +Create a new record without specifying a primary key, or trigger a custom action. Handled by the resource's `post(data)` method. The auto-assigned primary key is returned in the `Location` response header. + +```http +POST /MyTable/ +Content-Type: application/json + +{ "name": "some data" } +``` + +### PATCH + +Partially update a record, merging only the provided properties (CRDT-style update). Unspecified properties are preserved. + +Added in: v4.3.0 + +```http +PATCH /MyTable/123 +Content-Type: application/json + +{ "status": "active" } +``` + +### DELETE + +Delete a specific record or all records matching a query. + +```http +DELETE /MyTable/123 +``` + +Deletes the record with primary key `123`. + +```http +DELETE /MyTable/?status=archived +``` + +Deletes all records matching `status=archived`. + +## Content Types + +Harper supports multiple content types for both request bodies and responses. Use the `Content-Type` header for request bodies and the `Accept` header to request a specific response format. + +See [Content Types](./content-types.md) for the full list of supported formats and encoding recommendations. + +## OpenAPI + +Added in: v4.3.0 + +Harper automatically generates an OpenAPI specification for all resources exported via a schema. This endpoint is available at: + +```http +GET /openapi +``` + +## See Also + +- [Querying](./querying.md) — Full URL query syntax, operators, and examples +- [Headers](./headers.md) — HTTP headers used by the REST interface +- [Content Types](./content-types.md) — Supported formats (JSON, CBOR, MessagePack, CSV) +- [WebSockets](./websockets.md) — Real-time connections via WebSocket +- [Server-Sent Events](./server-sent-events.md) — One-way streaming via SSE +- [HTTP Server](../http/overview.md) — Underlying HTTP server configuration +- [Database / Schema](../database/schema.md) — How to define and export resources diff --git a/reference/rest/querying.md b/reference/rest/querying.md new file mode 100644 index 00000000..37ad0b4d --- /dev/null +++ b/reference/rest/querying.md @@ -0,0 +1,261 @@ +--- +title: REST Querying +--- + + + + + + +# REST Querying + +Harper's REST interface supports a rich URL-based query language for filtering, sorting, selecting, and limiting records. Queries are expressed as URL query parameters on collection paths. + +## Basic Attribute Filtering + +Search by attribute name and value using query parameters. The queried attribute must be indexed. + +```http +GET /Product/?category=software +``` + +Multiple attributes can be combined — only one needs to be indexed for the query to execute: + +```http +GET /Product/?category=software&inStock=true +``` + +### Null Queries + +Added in: v4.3.0 + +Query for null values or non-null values: + +```http +GET /Product/?discount=null +``` + +Note: Only indexes created in v4.3.0 or later support null indexing. Existing indexes must be rebuilt (removed and re-added) to support null queries. + +## Comparison Operators (FIQL) + +Harper uses [FIQL](https://datatracker.ietf.org/doc/html/draft-nottingham-atompub-fiql-00) syntax for comparison operators: + +| Operator | Meaning | +| -------------------- | -------------------------------------- | +| `==` | Equal | +| `=lt=` | Less than | +| `=le=` | Less than or equal | +| `=gt=` | Greater than | +| `=ge=` | Greater than or equal | +| `=ne=`, `!=` | Not equal | +| `=ct=` | Contains (strings) | +| `=sw=`, `==*` | Starts with (strings) | +| `=ew=` | Ends with (strings) | +| `=`, `===` | Strict equality (no type conversion) | +| `!==` | Strict inequality (no type conversion) | + +**Examples**: + +```http +GET /Product/?price=gt=100 +GET /Product/?price=le=20 +GET /Product/?name==Keyboard* +GET /Product/?category=software&price=gt=100&price=lt=200 +``` + +For date fields, colons must be URL-encoded as `%3A`: + +```http +GET /Product/?listDate=gt=2017-03-08T09%3A30%3A00.000Z +``` + +### Chained Conditions (Range) + +Omit the attribute name on the second condition to chain it against the same attribute: + +```http +GET /Product/?price=gt=100<=200 +``` + +Chaining supports `gt`/`ge` combined with `lt`/`le` for range queries. No other chaining combinations are currently supported. + +### Type Conversion + +For FIQL comparators (`==`, `!=`, `=gt=`, etc.), Harper applies automatic type conversion: + +| Syntax | Behavior | +| ----------------------------------------- | ------------------------------------------- | +| `name==null` | Converts to `null` | +| `name==123` | Converts to number if attribute is untyped | +| `name==true` | Converts to boolean if attribute is untyped | +| `name==number:123` | Explicit number conversion | +| `name==boolean:true` | Explicit boolean conversion | +| `name==string:some%20text` | Keep as string with URL decode | +| `name==date:2024-01-05T20%3A07%3A27.955Z` | Explicit Date conversion | + +If the attribute specifies a type in the schema (e.g., `Float`), values are always converted to that type before searching. + +For strict operators (`=`, `===`, `!==`), no automatic type conversion is applied — the value is decoded as a URL-encoded string, and the attribute type (if declared in the schema) dictates type conversion. + +## Unions (OR Logic) + +Use `|` instead of `&` to combine conditions with OR logic: + +```http +GET /Product/?rating=5|featured=true +``` + +## Grouping + +Use parentheses or square brackets to control order of operations: + +```http +GET /Product/?rating=5|(price=gt=100&price=lt=200) +``` + +Square brackets are recommended when constructing queries from user input because standard URI encoding safely encodes `[` and `]` (but not `(`): + +```http +GET /Product/?rating=5&[tag=fast|tag=scalable|tag=efficient] +``` + +Constructing from JavaScript: + +```javascript +let url = `/Product/?rating=5&[${tags.map(encodeURIComponent).join('|')}]`; +``` + +Groups can be nested for complex conditions: + +```http +GET /Product/?price=lt=100|[rating=5&[tag=fast|tag=scalable|tag=efficient]&inStock=true] +``` + +## Query Functions + +Harper supports special query functions using call syntax, included in the query string separated by `&`. + +### `select(properties)` + +Specify which properties to include in the response. + +| Syntax | Returns | +| -------------------------------------- | ------------------------------------------- | +| `?select(property)` | Values of a single property directly | +| `?select(property1,property2)` | Objects with only the specified properties | +| `?select([property1,property2])` | Arrays of property values | +| `?select(property1,)` | Objects with a single specified property | +| `?select(property{subProp1,subProp2})` | Nested objects with specific sub-properties | + +**Examples**: + +```http +GET /Product/?category=software&select(name) +GET /Product/?brand.name=Microsoft&select(name,brand{name}) +``` + +### `limit(end)` or `limit(start,end)` + +Limit the number of results returned, with an optional starting offset. + +```http +GET /Product/?rating=gt=3&inStock=true&select(rating,name)&limit(20) +GET /Product/?rating=gt=3&limit(10,30) +``` + +### `sort(property)` or `sort(+property,-property,...)` + +Sort results by one or more properties. Prefix `+` or no prefix = ascending; `-` = descending. Multiple properties break ties in order. + +```http +GET /Product/?rating=gt=3&sort(+name) +GET /Product/?sort(+rating,-price) +``` + +Added in: v4.3.0 + +## Relationships and Joins + +Added in: v4.3.0 + +Harper supports querying across related tables through dot-syntax chained attributes. Relationships must be defined in the schema using `@relation`. + +**Schema example**: + +```graphql +type Product @table @export { + id: Long @primaryKey + name: String + brandId: Long @indexed + brand: Brand @relation(from: "brandId") +} +type Brand @table @export { + id: Long @primaryKey + name: String + products: [Product] @relation(to: "brandId") +} +``` + +**Query by related attribute** (INNER JOIN behavior): + +```http +GET /Product/?brand.name=Microsoft +GET /Brand/?products.name=Keyboard +``` + +### Nested Select with Joins + +Relationship attributes are not included by default. Use `select()` to include them: + +```http +GET /Product/?brand.name=Microsoft&select(name,brand) +GET /Product/?brand.name=Microsoft&select(name,brand{name}) +GET /Product/?name=Keyboard&select(name,brand{name,id}) +``` + +When selecting without a filter on the related table, this acts as a LEFT JOIN — the relationship property is omitted if the foreign key is null or references a non-existent record. + +### Many-to-Many Relationships + +Many-to-many relationships can be modeled with an array of foreign key values, without a junction table: + +```graphql +type Product @table @export { + id: Long @primaryKey + name: String + resellerIds: [Long] @indexed + resellers: [Reseller] @relation(from: "resellerId") +} +``` + +```http +GET /Product/?resellers.name=Cool Shop&select(id,name,resellers{name,id}) +``` + +The array order of `resellerIds` is preserved when resolving the relationship. + +## Property Access via URL + +Changed in: v4.5.0 + +Access a specific property of a record by appending it with dot syntax to the record id: + +```http +GET /MyTable/123.propertyName +``` + +This only works for properties declared in the schema. As of v4.5.0, dots in URL paths are no longer interpreted as property access for undeclared properties, allowing URLs to generally include dots without being misinterpreted. + +## `directURLMapping` Option + +Added in: v4.5.0 + +Resources can be configured with `directURLMapping: true` for more direct URL path handling. When enabled, the URL path is mapped more directly to the resource without the default query parameter parsing semantics. See [Database / Schema](../database/schema.md) for configuration details. + +## See Also + +- [REST Overview](./overview.md) — HTTP methods, URL structure, and caching +- [Headers](./headers.md) — Request and response headers +- [Content Types](./content-types.md) — Encoding formats +- [Database / Schema](../database/schema.md) — Defining schemas, relationships, and indexes diff --git a/reference/rest/server-sent-events.md b/reference/rest/server-sent-events.md new file mode 100644 index 00000000..bdffaa1f --- /dev/null +++ b/reference/rest/server-sent-events.md @@ -0,0 +1,64 @@ +--- +title: Server-Sent Events +--- + + + + +# Server-Sent Events + +Added in: v4.2.0 + +Harper supports Server-Sent Events (SSE), a simple and efficient mechanism for browser-based applications to receive real-time updates from the server over a standard HTTP connection. SSE is a one-directional transport — the server pushes events to the client, and the client has no way to send messages back on the same connection. + +## Connecting + +SSE connections are made by targeting a resource URL. By default, connecting to a resource path subscribes to changes for that resource and streams events as they occur. + +```javascript +let eventSource = new EventSource('https://server/my-resource/341', { + withCredentials: true, +}); + +eventSource.onmessage = (event) => { + let data = JSON.parse(event.data); +}; +``` + +The URL path maps to the resource in the same way as REST and WebSocket connections. Connecting to `/my-resource/341` subscribes to updates for the record with id `341` in the `my-resource` table (or custom resource). + +## `connect()` Handler + +SSE connections use the same `connect()` method as WebSockets on resource classes, with one key difference: since SSE is one-directional, `connect()` is called without an `incomingMessages` argument. + +```javascript +export class MyResource extends Resource { + async *connect() { + // yield messages to send to the client + while (true) { + await someCondition(); + yield { event: 'update', data: { value: 42 } }; + } + } +} +``` + +The default `connect()` behavior subscribes to the resource and streams changes automatically. + +## When to Use SSE vs WebSockets + +| | SSE | WebSockets | +| --------------- | ------------------------------------- | -------------------------------- | +| Direction | Server → Client only | Bidirectional | +| Transport | Standard HTTP | HTTP upgrade | +| Browser support | Native `EventSource` API | Native `WebSocket` API | +| Use case | Live feeds, dashboards, notifications | Interactive real-time apps, MQTT | + +SSE is simpler to implement and has built-in reconnection in browsers. For scenarios requiring bidirectional communication, use [WebSockets](./websockets.md). + +## See Also + +- [WebSockets](./websockets.md) — Bidirectional real-time connections +- [MQTT Overview](../mqtt/overview.md) — Full MQTT pub/sub documentation +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Resources](../resources/overview.md) — Custom resource API including `connect()` diff --git a/reference/rest/websockets.md b/reference/rest/websockets.md new file mode 100644 index 00000000..005b6795 --- /dev/null +++ b/reference/rest/websockets.md @@ -0,0 +1,106 @@ +--- +title: WebSockets +--- + + + + + +# WebSockets + +Added in: v4.2.0 + +Harper supports WebSocket connections through the REST interface, enabling real-time bidirectional communication with resources. WebSocket connections target a resource URL path — by default, connecting to a resource subscribes to changes for that resource. + +## Configuration + +WebSocket support is enabled automatically when the `rest` plugin is enabled. To disable it: + +```yaml +rest: + webSocket: false +``` + +## Connecting + +A WebSocket connection to a resource URL subscribes to that resource and streams change events: + +```javascript +let ws = new WebSocket('wss://server/my-resource/341'); +ws.onmessage = (event) => { + let data = JSON.parse(event.data); +}; +``` + +By default, `new WebSocket('wss://server/my-resource/341')` accesses the resource defined for `my-resource` with record id `341` and subscribes to it. When the record changes or a message is published to it, the WebSocket connection receives the update. + +## Custom `connect()` Handler + +WebSocket behavior is driven by the `connect(incomingMessages)` method on a resource class. The method must return an async iterable (or generator) that produces messages to send to the client. For more on implementing custom resources, see [Resource API](../resources/resource-api.md). + +**Simple echo server**: + +```javascript +export class Echo extends Resource { + async *connect(incomingMessages) { + for await (let message of incomingMessages) { + yield message; // echo each message back + } + } +} +``` + +**Using the default connect with event-style access**: + +The default `connect()` returns a convenient streaming iterable with: + +- A `send(message)` method for pushing outgoing messages +- A `close` event for cleanup on disconnect + +```javascript +export class Example extends Resource { + connect(incomingMessages) { + let outgoingMessages = super.connect(); + + let timer = setInterval(() => { + outgoingMessages.send({ greeting: 'hi again!' }); + }, 1000); + + incomingMessages.on('data', (message) => { + outgoingMessages.send(message); // echo incoming messages + }); + + outgoingMessages.on('close', () => { + clearInterval(timer); + }); + + return outgoingMessages; + } +} +``` + +## MQTT over WebSockets + +Harper also supports MQTT over WebSockets. The sub-protocol must be set to `mqtt` as required by the MQTT specification: + +```http +Sec-WebSocket-Protocol: mqtt +``` + +See [MQTT Overview](../mqtt/overview.md) for full MQTT documentation. + +## Message Ordering in Distributed Environments + +Harper prioritizes low-latency delivery in distributed (multi-node) environments. Messages are delivered to local subscribers immediately upon arrival — Harper does not delay messages for inter-node coordination. + +In a scenario where messages arrive out-of-order across nodes: + +- **Non-retained messages** (published without a `retain` flag): Every message is delivered to subscribers in the order received, even if out-of-order relative to other nodes. Good for use cases like chat where every message must be delivered. +- **Retained messages** (published with `retain`, or PUT/updated in the database): Only the message with the latest timestamp is kept as the "winning" record. Out-of-order older messages are not re-delivered. This ensures eventual consistency of the most recent record state across the cluster. Good for use cases like sensor readings where only the latest value matters. + +## See Also + +- [Server-Sent Events](./server-sent-events.md) — One-way real-time streaming +- [MQTT Overview](../mqtt/overview.md) — Full MQTT pub/sub documentation +- [REST Overview](./overview.md) — HTTP methods and URL structure +- [Resources](../resources/overview.md) — Custom resource API including `connect()` diff --git a/reference/security/api.md b/reference/security/api.md new file mode 100644 index 00000000..67c359f6 --- /dev/null +++ b/reference/security/api.md @@ -0,0 +1,23 @@ +--- +title: Security API +--- + + + + +# Security API + +Harper exposes security-related globals accessible in all component JavaScript modules without needing to import them. + +--- + +## `auth(username, password?): Promise` + +Returns the user object for the given username. If `password` is provided, it is verified before returning the user (throws on incorrect password). + +```javascript +const user = await auth('admin', 'secret'); +// user.role, user.username, etc. +``` + +This is useful for implementing custom authentication flows or verifying credentials in component code. For HTTP-level authentication configuration, see [Security Overview](./overview.md). diff --git a/reference/security/basic-authentication.md b/reference/security/basic-authentication.md new file mode 100644 index 00000000..45535cfd --- /dev/null +++ b/reference/security/basic-authentication.md @@ -0,0 +1,58 @@ +--- +id: basic-authentication +title: Basic Authentication +--- + + + +Available since: v4.1.0 + +Harper supports HTTP Basic Authentication. In the context of an HTTP transaction, [Basic Authentication](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/Authentication#basic_authentication_scheme) is the simplest authorization scheme which transmits credentials as username/password pairs encoded using base64. Importantly, this scheme does not encrypt credentials. If used over an insecure connection, such as HTTP, they are susceptible to being compromised. Only ever use Basic Authentication over secured connections, such as HTTPS. Even then, its better to upgrade to an encryption based authentication scheme or certificates. See [HTTP / TLS](../http/tls.md) for more information. + +## How It Works + +Each request must contain the `Authorization` header with a value if `Basic `, where `` is the Base64 encoding of the string `username:password`. + +``` +Authorization: Basic +``` + +## Example + +The following example shows how to construct the Authorization header using `btoa()`: + +```javascript +const username = 'HDB_ADMIN'; +const password = 'abc123!'; +const authorizationValue = `Basic ${btoa(`${username}:${password}`)}`; +``` + +Then use the `authorizationValue` as the value for the `Authorization` header such as: + +```javascript +fetch('/', { + // ... + headers: { + Authorization: authorizationValue, + }, + // ... +}); +``` + +## cURL Example + +With cURL you can use the `--user` (`-u`) command-line option to automatically handle the Base64 encoding: + +```bash +curl -u "username:password" [URL] +``` + +## When to Use Basic Auth + +Basic authentication is the simplest option and is appropriate for: + +- Server-to-server requests in trusted environments +- Development and testing +- Scenarios where token management overhead is undesirable + +For user-facing applications or when tokens are preferred for performance reasons, see [JWT Authentication](./jwt-authentication.md). diff --git a/reference/security/certificate-management.md b/reference/security/certificate-management.md new file mode 100644 index 00000000..79f254b8 --- /dev/null +++ b/reference/security/certificate-management.md @@ -0,0 +1,152 @@ +--- +id: certificate-management +title: Certificate Management +--- + + + + + +This page covers certificate management for Harper's external-facing HTTP and Operations APIs. For replication certificate management, see [Replication Certificate Management](../replication/clustering.md). + +## Default Behavior + +On first run, Harper automatically generates self-signed TLS certificates at `/keys/`: + +- `certificate.pem` — The server certificate +- `privateKey.pem` — The server private key +- `ca.pem` — A self-signed Certificate Authority + +These certificates have a valid Common Name (CN), but they are not signed by a root authority. HTTPS can be used with them, but clients must be configured to accept the invalid certificate. + +## Development Setup + +By default, HTTPS is disabled. HTTP is suitable for local development and trusted private networks. If you are developing on a remote server with requests traversing the Internet, enable HTTPS. + +To enable HTTPS, set `http.securePort` in `harperdb-config.yaml` and restart Harper: + +```yaml +http: + securePort: 9926 +``` + +Harper will use the auto-generated certificates from `/keys/`. + +## Production Setup + +For production, use certificates from your own CA or a public CA, with CNs that match the Fully Qualified Domain Name (FQDN) of your Harper node. + +### Option 1: Replace Harper Certificates + +Enable HTTPS and replace the certificate files: + +```yaml +http: + securePort: 9926 +tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +Either replace the files at `/keys/` in place, or update `tls.certificate` and `tls.privateKey` to point to your new files and restart Harper. + +The `operationsApi.tls` section is optional. If not set, Harper uses the values from the top-level `tls` section. You can specify different certificates for the Operations API: + +```yaml +operationsApi: + tls: + certificate: ~/hdb/keys/certificate.pem + privateKey: ~/hdb/keys/privateKey.pem +``` + +### Option 2: Nginx Reverse Proxy + +Instead of enabling HTTPS directly on Harper, use Nginx as a reverse proxy. Configure Nginx to handle HTTPS with certificates from your own CA or a public CA, then forward HTTP requests to Harper. + +This approach keeps Harper's HTTP interface internal while Nginx handles TLS termination. + +### Option 3: External Reverse Proxy / Load Balancer + +External services such as an AWS Elastic Load Balancer or Google Cloud Load Balancing can act as TLS-terminating reverse proxies. Configure the service to accept HTTPS connections and forward over a private network to Harper as HTTP. + +These services typically include integrated certificate management. + +## mTLS Setup + +Mutual TLS (mTLS) requires both client and server to present certificates. To enable mTLS, provide a CA certificate that Harper will use to verify client certificates: + +```yaml +http: + mtls: + required: true +tls: + certificateAuthority: ~/hdb/keys/ca.pem +``` + +For full mTLS authentication details, see [mTLS Authentication](./mtls-authentication.md). + +## Certificate Verification + +Added in: v4.5.0 (certificate revocation); v4.7.0 (OCSP support) + +When using mTLS, enable certificate verification to ensure revoked certificates cannot authenticate even if still within their validity period: + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +Harper supports two industry-standard methods: + +**CRL (Certificate Revocation List)** + +- Downloaded and cached locally (24 hours by default) +- Fast verification after first download (no network requests) +- Best for high-volume verification and offline scenarios + +**OCSP (Online Certificate Status Protocol)** + +- Real-time query to the CA's OCSP responder +- Best for certificates without CRL distribution points +- Responses cached (1 hour by default) + +**Harper's approach: CRL-first with OCSP fallback** + +1. Checks CRL if available (fast, cached locally) +2. Falls back to OCSP if CRL is unavailable or fails +3. Applies the configured failure mode if both methods fail + +For full configuration options and troubleshooting, see [Certificate Verification](./certificate-verification.md). + +## Dynamic Certificate Management + +Added in: v4.4.0 + +Certificates — including CAs and private keys — can be dynamically managed without restarting Harper. + +## Multiple Certificate Authorities + +It is possible to use different certificates for the Operations API and the HTTP (custom application) API. For example, in scenarios where only your application endpoints need to be exposed to the Internet and the Operations API is reserved for administration, you may use a private CA for the Operations API and a public CA for your application certificates. + +Configure each separately: + +```yaml +# Top-level tls: used by HTTP/application endpoints +tls: + certificate: ~/hdb/keys/app-certificate.pem + privateKey: ~/hdb/keys/app-privateKey.pem + +# Operations API can use a separate cert +operationsApi: + tls: + certificate: ~/hdb/keys/ops-certificate.pem + privateKey: ~/hdb/keys/ops-privateKey.pem +``` + +## Renewing Certificates + +The `harper renew-certs` CLI command renews the auto-generated Harper certificates. See [CLI Commands](../cli/commands.md) for details. + +**Changes to TLS settings require a restart**, except where dynamic certificate management is used. diff --git a/reference/security/certificate-verification.md b/reference/security/certificate-verification.md new file mode 100644 index 00000000..e2ee6ad2 --- /dev/null +++ b/reference/security/certificate-verification.md @@ -0,0 +1,449 @@ +--- +id: certificate-verification +title: Certificate Verification +--- + + + + + +Added in: v4.5.0 + +Changed in: v4.7.0 (OCSP support added) + +Certificate verification (also called certificate revocation checking) ensures that revoked certificates cannot be used for mTLS authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date — due to compromise, employee departure, or other security concerns. + +## Overview + +When a client presents a certificate for mTLS authentication, Harper performs two levels of checks: + +1. **Certificate Validation** (always performed by Node.js TLS): + - Certificate signature is valid + - Certificate is issued by a trusted CA + - Certificate is within its validity period + - Certificate chain is properly formed + +2. **Certificate Revocation Checking** (optional, must be explicitly enabled): + - Certificate has not been revoked by the issuing CA + - Uses CRL and/or OCSP + +Revocation checking is **disabled by default**. + +## Revocation Checking Methods + +### CRL (Certificate Revocation List) + +A CRL is a digitally signed list of revoked certificates published by a Certificate Authority. + +**Advantages:** + +- Fast verification (cached locally) +- Works offline once downloaded +- Predictable bandwidth usage +- Good for high-volume verification +- No privacy concerns (no per-certificate queries) + +**How it works:** + +1. Harper downloads the CRL from the distribution point specified in the certificate. +2. The CRL is cached locally (24 hours by default). +3. Subsequent verifications check the cached CRL — very fast, no network requests. +4. The CRL is refreshed in the background before expiration. + +**Configuration:** + +```yaml +http: + mtls: + certificateVerification: + crl: + timeout: 10000 # 10 seconds to download CRL + cacheTtl: 86400000 # Cache for 24 hours + gracePeriod: 86400000 # 24 hour grace period after nextUpdate + failureMode: fail-closed # Reject on CRL check failure +``` + +### OCSP (Online Certificate Status Protocol) + +OCSP provides real-time certificate status checking by querying the CA's OCSP responder. + +**Advantages:** + +- Real-time revocation status +- Smaller response size than CRL +- Good for certificates without CRL distribution points +- Works when CRL is unavailable + +**How it works:** + +1. Harper sends a request to the OCSP responder specified in the certificate. +2. The responder returns the current status: good, revoked, or unknown. +3. The response is cached (1 hour by default for success, 5 minutes for errors). + +**Configuration:** + +```yaml +http: + mtls: + certificateVerification: + ocsp: + timeout: 5000 # 5 seconds for OCSP response + cacheTtl: 3600000 # Cache successful responses for 1 hour + errorCacheTtl: 300000 # Cache errors for 5 minutes + failureMode: fail-closed # Reject on OCSP check failure +``` + +## Verification Strategy + +Harper uses a **CRL-first strategy with OCSP fallback**: + +1. **Check CRL** if available (fast; uses cached CRL; no network request if cached). +2. **Fall back to OCSP** if the certificate has no CRL distribution point, the CRL download fails, or the CRL is expired and cannot be refreshed. +3. **Apply failure mode** if both methods fail. + +This provides the best balance of performance, reliability, and security. + +## Configuration + +### Enable with Defaults + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +This enables CRL checking (10s timeout, 24h cache), OCSP checking (5s timeout, 1h cache), and fail-closed mode. + +### Custom Configuration + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed # Global setting + crl: + timeout: 15000 # 15 seconds for CRL download + cacheTtl: 43200000 # Cache CRLs for 12 hours + gracePeriod: 86400000 # 24 hour grace period + failureMode: fail-closed # CRL-specific setting + ocsp: + timeout: 8000 # 8 seconds for OCSP response + cacheTtl: 7200000 # Cache results for 2 hours + errorCacheTtl: 600000 # Cache errors for 10 minutes + failureMode: fail-closed # OCSP-specific setting +``` + +### CRL Only (No OCSP) + +```yaml +http: + mtls: + certificateVerification: + ocsp: false # Disable OCSP; CRL remains enabled +``` + +Only disable OCSP if all client certificates have CRL distribution points. Otherwise, certificates without CRL URLs won't be checked for revocation. + +### OCSP Only (No CRL) + +```yaml +http: + mtls: + certificateVerification: + crl: false # Disable CRL; OCSP remains enabled +``` + +### Environment Variables + +All settings can be configured via environment variables: + +```bash +# Enable certificate verification +HTTP_MTLS_CERTIFICATEVERIFICATION=true + +# Global failure mode +HTTP_MTLS_CERTIFICATEVERIFICATION_FAILUREMODE=fail-closed + +# CRL settings +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL=true +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_TIMEOUT=15000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_CACHETTL=43200000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_GRACEPERIOD=86400000 +HTTP_MTLS_CERTIFICATEVERIFICATION_CRL_FAILUREMODE=fail-closed + +# OCSP settings +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP=true +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_TIMEOUT=8000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_CACHETTL=7200000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_ERRORCACHETTL=600000 +HTTP_MTLS_CERTIFICATEVERIFICATION_OCSP_FAILUREMODE=fail-closed +``` + +For replication servers, use the `REPLICATION_` prefix instead of `HTTP_`. + +## Failure Modes + +### fail-closed (Recommended) + +**Default behavior.** Rejects connections when verification fails due to network errors, timeouts, or other operational issues. + +Use when: + +- Security is paramount +- You can tolerate false positives (rejecting valid certificates due to CA unavailability) +- Your CA infrastructure is highly available +- You're in a zero-trust environment + +```yaml +certificateVerification: + failureMode: fail-closed +``` + +### fail-open + +Allows connections when verification fails, but logs a warning. The connection is still rejected if the certificate is explicitly found to be revoked. + +Use when: + +- Availability is more important than perfect security +- Your CA infrastructure may be intermittently unavailable +- You have other compensating controls +- You're gradually rolling out certificate verification + +```yaml +certificateVerification: + failureMode: fail-open +``` + +**Important:** Invalid signatures on CRLs always result in rejection regardless of failure mode, as this indicates potential tampering. + +## Performance Considerations + +### CRL Performance + +- **First verification**: Downloads CRL (10s timeout by default) +- **Subsequent verifications**: Instant (reads from cache) +- **Background refresh**: CRL is refreshed before expiration without blocking requests +- **Memory usage**: ~10–100KB per CRL depending on size +- **Network usage**: One download per CRL per `cacheTtl` period + +### OCSP Performance + +- **First verification**: OCSP query (5s timeout by default) +- **Subsequent verifications**: Reads from cache (1 hour default) +- **Memory usage**: Minimal (~1KB per cached response) +- **Network usage**: One query per unique certificate per `cacheTtl` period + +### Optimization Tips + +Increase CRL cache TTL for stable environments: + +```yaml + +... +crl: + cacheTtl: 172800000 # 48 hours +``` + +Increase OCSP cache TTL for long-lived connections: + +```yaml + +... +ocsp: + cacheTtl: 7200000 # 2 hours +``` + +Reduce grace period for tighter revocation enforcement: + +```yaml + +... +crl: + gracePeriod: 0 # No grace period +``` + +## Production Best Practices + +### High-Security Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-closed + crl: + timeout: 15000 + cacheTtl: 43200000 # 12 hours + gracePeriod: 0 # No grace period for strict enforcement + ocsp: + timeout: 8000 + cacheTtl: 3600000 # 1 hour +``` + +### High-Availability Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + failureMode: fail-open # Prioritize availability + crl: + timeout: 5000 + cacheTtl: 86400000 # 24 hours + gracePeriod: 86400000 # 24 hour grace period + ocsp: + timeout: 3000 + cacheTtl: 7200000 # 2 hours +``` + +### Performance-Critical Environments + +```yaml +http: + mtls: + required: true + certificateVerification: + crl: + cacheTtl: 172800000 # 48 hours + gracePeriod: 86400000 + ocsp: + cacheTtl: 7200000 # 2 hours + errorCacheTtl: 600000 +``` + +## Troubleshooting + +### Connection Rejected: Certificate Verification Failed + +**Cause:** Certificate was found to be revoked, or verification failed in fail-closed mode. + +**Solutions:** + +1. Check if the certificate is actually revoked in the CRL or OCSP responder. +2. Verify CA infrastructure is accessible. +3. Check timeout settings — increase if needed. +4. Temporarily switch to fail-open mode while investigating. + +### High Latency on First Connection + +**Cause:** CRL is being downloaded for the first time. + +**Solutions:** + +1. This is normal; only happens once per CRL per `cacheTtl` period. +2. Subsequent connections will be fast (cached CRL). +3. Increase CRL timeout if downloads are slow: + ```yaml + crl: + timeout: 20000 # 20 seconds + ``` + +### Frequent CRL Downloads + +**Cause:** `cacheTtl` is too short, or the CRL's `nextUpdate` period is very short. + +**Solutions:** + +1. Increase `cacheTtl`: + ```yaml + crl: + cacheTtl: 172800000 # 48 hours + ``` +2. Increase `gracePeriod` to allow using slightly expired CRLs. + +### OCSP Responder Unavailable + +**Cause:** OCSP responder is down or unreachable. + +**Solutions:** + +1. CRL will be used as fallback automatically. +2. Use fail-open mode to allow connections: + ```yaml + ocsp: + failureMode: fail-open + ``` +3. Disable OCSP and rely on CRL only (ensure all certs have CRL URLs): + ```yaml + ocsp: false + ``` + +### Network or Firewall Blocking Outbound Requests + +**Cause:** Secure hosting environments often restrict outbound HTTP/HTTPS traffic. This prevents Harper from reaching CRL distribution points and OCSP responders. + +**Symptoms:** + +- Certificate verification timeouts in fail-closed mode +- Logs show connection failures to CRL/OCSP URLs +- First connection may succeed (no cached data), but subsequent connections fail after cache expires + +**Solutions:** + +1. **Allow outbound traffic to CA infrastructure** (recommended): + - Whitelist CRL distribution point URLs from your certificates + - Whitelist OCSP responder URLs from your certificates + - Example for Let's Encrypt: allow `http://x1.c.lencr.org/` and `http://ocsp.int-x3.letsencrypt.org/` + +2. **Use fail-open mode:** + + ```yaml + certificateVerification: + failureMode: fail-open + ``` + +3. **Set up an internal CRL mirror/proxy:** + + ```yaml + certificateVerification: + crl: + cacheTtl: 172800000 # 48 hours + ocsp: false + ``` + +4. **Disable verification** (if you have alternative security controls): + ```yaml + certificateVerification: false + ``` + +## Security Considerations + +Enable certificate verification when: + +- Certificates have long validity periods (> 1 day) +- You need immediate revocation capability +- Compliance requires revocation checking (PCI DSS, HIPAA, etc.) +- You're in a zero-trust security model +- Client certificates are used for API authentication + +Consider skipping it when: + +- Certificates have very short validity periods (< 24 hours) +- You rotate certificates automatically (e.g., with cert-manager) +- You have alternative revocation mechanisms +- Your CA doesn't publish CRLs or support OCSP + +Certificate verification is one layer of security. Also consider: short certificate validity periods, certificate pinning, network segmentation, access logging, and regular certificate rotation. + +## Replication + +Certificate verification works identically for replication servers. Use the `replication.mtls` configuration: + +```yaml +replication: + hostname: server-one + routes: + - server-two + mtls: + certificateVerification: true +``` + +mTLS is always required for replication and cannot be disabled. This configuration only controls whether certificate revocation checking is performed. + +For complete replication configuration, see [Replication Configuration](../replication/clustering.md). diff --git a/reference/security/configuration.md b/reference/security/configuration.md new file mode 100644 index 00000000..717bc180 --- /dev/null +++ b/reference/security/configuration.md @@ -0,0 +1,69 @@ +--- +id: configuration +title: Authentication Configuration +--- + + + +Harper's authentication system is configured via the top-level `authentication` section of `harperdb-config.yaml`. + +```yaml +authentication: + authorizeLocal: true + cacheTTL: 30000 + enableSessions: true + operationTokenTimeout: 1d + refreshTokenTimeout: 30d + hashFunction: sha256 +``` + +## Options + +### `authorizeLocal` + +_Type: boolean — Default: `true`_ + +Automatically authorizes requests from the loopback IP address (`127.0.0.1`) as the superuser, without requiring credentials. Disable this for any Harper server that may be accessed by untrusted users from the same instance — for example, when using a local proxy or for general server hardening. + +### `cacheTTL` + +_Type: number — Default: `30000`_ + +How long (in milliseconds) an authentication result — a particular `Authorization` header or token — can be cached. Increasing this improves performance at the cost of slower revocation. + +### `enableSessions` + +_Type: boolean — Default: `true`_ + +Added in: v4.2.0 + +Enables cookie-based sessions to maintain an authenticated session across requests. This is the preferred authentication mechanism for web browsers: cookies hold the token securely without exposing it to JavaScript, reducing XSS vulnerability risk. + +### `operationTokenTimeout` + +_Type: string — Default: `1d`_ + +How long a JWT operation token remains valid before expiring. Accepts [`jsonwebtoken`-compatible](https://github.com/auth0/node-jsonwebtoken#token-expiration-exp-claim) duration strings (e.g., `1d`, `12h`, `60m`). See [JWT Authentication](./jwt-authentication.md). + +### `refreshTokenTimeout` + +_Type: string — Default: `30d`_ + +How long a JWT refresh token remains valid before expiring. Accepts [`jsonwebtoken`-compatible](https://github.com/auth0/node-jsonwebtoken#token-expiration-exp-claim) duration strings. See [JWT Authentication](./jwt-authentication.md). + +### `hashFunction` + +_Type: string — Default: `sha256`_ + +Added in: v4.5.0 + +Password hashing algorithm used when storing user passwords. Replaced the previous MD5 hashing. Options: + +- **`sha256`** — Default. Good security and excellent performance. +- **`argon2id`** — Highest security. More CPU-intensive; recommended for environments that do not require frequent password verifications. + +## Related + +- [JWT Authentication](./jwt-authentication.md) +- [Basic Authentication](./basic-authentication.md) +- [Users & Roles / Configuration](../users-and-roles/configuration.md) diff --git a/reference/security/jwt-authentication.md b/reference/security/jwt-authentication.md new file mode 100644 index 00000000..8a35124c --- /dev/null +++ b/reference/security/jwt-authentication.md @@ -0,0 +1,118 @@ +--- +id: jwt-authentication +title: JWT Authentication +--- + +s + +Available since: v4.1.0 + +Harper supports token-based authentication using JSON Web Tokens (JWTs). Rather than sending credentials on every request, a client authenticates once and receives tokens that are used for subsequent requests. + +## Tokens + +JWT authentication uses two token types: + +- **`operation_token`** — Used to authenticate all Harper operations via a `Bearer` token `Authorization` header. Default expiry: 1 day. +- **`refresh_token`** — Used to obtain a new `operation_token` when the current one expires. Default expiry: 30 days. + +## Create Authentication Tokens + +Call `create_authentication_tokens` with your Harper credentials. No `Authorization` header is required for this operation. + +```json +{ + "operation": "create_authentication_tokens", + "username": "username", + "password": "password" +} +``` + +cURL example: + +```bash +curl --location --request POST 'http://localhost:9925' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "operation": "create_authentication_tokens", + "username": "username", + "password": "password" + }' +``` + +Response: + +```json +{ + "operation_token": "", + "refresh_token": "" +} +``` + +## Using the Operation Token + +Pass the `operation_token` as a `Bearer` token in the `Authorization` header on subsequent requests: + +```bash +curl --location --request POST 'http://localhost:9925' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data-raw '{ + "operation": "search_by_hash", + "schema": "dev", + "table": "dog", + "hash_values": [1], + "get_attributes": ["*"] + }' +``` + +## Refreshing the Operation Token + +When the `operation_token` expires, use the `refresh_token` to obtain a new one. Pass the `refresh_token` as the `Bearer` token: + +```bash +curl --location --request POST 'http://localhost:9925' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data-raw '{ + "operation": "refresh_operation_token" + }' +``` + +Response: + +```json +{ + "operation_token": "" +} +``` + +When both tokens have expired, call `create_authentication_tokens` again with your username and password. + +## Token Expiry Configuration + +Token timeouts are configurable in `harperdb-config.yaml` under the top-level `authentication` section: + +```yaml +authentication: + operationTokenTimeout: 1d # Default: 1 day + refreshTokenTimeout: 30d # Default: 30 days +``` + +Valid duration string values follow the [`jsonwebtoken` package format](https://github.com/auth0/node-jsonwebtoken#token-expiration-exp-claim) (e.g., `1d`, `12h`, `60m`). See [Security / Configuration](./configuration.md) for the full authentication config reference. + +## When to Use JWT Auth + +JWT authentication is preferred over Basic Auth when: + +- You want to avoid sending credentials on every request +- Your client can store and manage tokens +- You have multiple sequential requests and want to avoid repeated credential encoding + +For simple or server-to-server scenarios, see [Basic Authentication](./basic-authentication.md). + +## Security Notes + +- Always use HTTPS in production to protect tokens in transit. See [HTTP / TLS](../http/tls.md). +- Store tokens securely; treat them like passwords. +- If a token is compromised, it will remain valid until it expires. Consider setting shorter `operationTokenTimeout` values in high-security environments. diff --git a/reference/security/mtls-authentication.md b/reference/security/mtls-authentication.md new file mode 100644 index 00000000..d1f202e9 --- /dev/null +++ b/reference/security/mtls-authentication.md @@ -0,0 +1,80 @@ +--- +id: mtls-authentication +title: mTLS Authentication +--- + + + + +Added in: v4.3.0 + +Harper supports Mutual TLS (mTLS) authentication for incoming HTTP connections. When enabled, the client must present a certificate signed by a trusted Certificate Authority (CA). If the certificate is valid and trusted, the connection is authenticated using the user whose username matches the `CN` (Common Name) from the client certificate's `subject`. + +## How It Works + +1. The client presents a TLS certificate during the handshake. +2. Harper validates the certificate against the configured CA (`tls.certificateAuthority`). +3. If valid, Harper extracts the `CN` from the certificate subject and uses it as the username for the request. + 1. Or it is configurable via the `http.mtls.user` option in the relevant configuration object. +4. Optionally, Harper checks whether the certificate has been revoked (see [Certificate Verification](./certificate-verification.md)). + +## Configuration + +mTLS is configured via the `http.mtls` section in `harperdb-config.yaml`. + +**Require mTLS for all connections:** + +```yaml +http: + mtls: + required: true +tls: + certificateAuthority: ~/hdb/keys/ca.pem +``` + +**Make mTLS optional (accept both mTLS and non-mTLS connections):** + +```yaml +http: + mtls: + required: false +tls: + certificateAuthority: ~/hdb/keys/ca.pem +``` + +When `required` is `false`, clients that do not present a certificate will fall back to other authentication methods (Basic Auth or JWT). + +For more configuration information see the [HTTP / Configuration](../http/configuration.md) and [HTTP / TLS](../http/tls.md) sections. + +## Certificate Revocation Checking + +When using mTLS, you can optionally enable certificate revocation checking to ensure that revoked certificates cannot authenticate, even if they are otherwise valid and trusted. + +To enable: + +```yaml +http: + mtls: + required: true + certificateVerification: true +``` + +Certificate revocation checking is **disabled by default** and must be explicitly enabled. For full details on CRL and OCSP configuration, see [Certificate Verification](./certificate-verification.md). + +## User Identity + +The username for the mTLS-authenticated request is derived from the `CN` field of the client certificate's subject. Ensure the CN value matches an existing Harper user account. See [Users and Roles](../users-and-roles/overview.md) for managing user accounts. + +## Setup Requirements + +To use mTLS you need: + +1. A Certificate Authority (CA) certificate configured in `tls.certificateAuthority`. +2. Client certificates signed by that CA, with a `CN` matching a Harper username. +3. The `http.mtls` configuration enabled. + +For help generating and managing certificates, see [Certificate Management](./certificate-management.md). + +## Replication + +mTLS is always required for Harper replication and cannot be disabled. For replication-specific mTLS configuration, see [Replication Configuration](../replication/clustering.md). diff --git a/reference/security/overview.md b/reference/security/overview.md new file mode 100644 index 00000000..e6abc3d8 --- /dev/null +++ b/reference/security/overview.md @@ -0,0 +1,55 @@ +--- +id: overview +title: Security +--- + + + + +Harper uses role-based, attribute-level security to ensure that users can only gain access to the data they are supposed to be able to access. Granular permissions allow for unparalleled flexibility and control, and can lower the total cost of ownership compared to other database solutions, since you no longer need to replicate subsets of data to isolate use cases. + +## Security Philosophy + +Harper's security model has two distinct layers: + +**Authentication** determines _who_ is making a request. Harper validates each request using one of the methods above, then resolves the caller to a known Harper user account. + +**Authorization** determines _what_ the caller can do. Each Harper user is assigned a role. Roles carry a permissions set that grants or denies CRUD access at the table and attribute level, in addition to controlling access to system operations. + +For details on how roles and permissions work, see [Users and Roles](../users-and-roles/overview.md). + +## Authentication Methods + +Harper supports three authentication methods: + +- [Basic Authentication](./basic-authentication.md) — Username and password sent as a Base64-encoded `Authorization` header on every request. +- [JWT Authentication](./jwt-authentication.md) — Token-based authentication using JSON Web Tokens. Clients authenticate once and receive short-lived operation tokens and longer-lived refresh tokens. +- [mTLS Authentication](./mtls-authentication.md) — Mutual TLS certificate-based authentication. + +## Certificate Management + +- [Certificate Management](./certificate-management.md) — Managing TLS certificates and Certificate Authorities for HTTPS and mTLS. +- [Certificate Verification](./certificate-verification.md) — Certificate revocation checking via CRL and OCSP. + +## Access Control + +- CORS — Cross-Origin Resource Sharing. + - For HTTP server configuration see [HTTP / Configuration / CORS](../http/configuration.md#cors) + - For Operations API configuration see [Operations API / Configuration](../configuration/operations.md) +- SSL & HTTPS — Enabling HTTPS and configuring TLS for the HTTP server. + - For HTTP server configuration see [HTTP / Configuration / TLS](../http/tls.md) + - For Operations API configuration see [Operations API / Configuration](../configuration/operations.md) +- [Users and Roles](../users-and-roles/overview.md) — Role-Based Access Control (RBAC): defining roles, assigning permissions, and managing users. + +## API + +- [Security API](./api.md) — JavaScript globals for security operations (e.g. `auth()`). + +## Default Behavior + +Out of the box, Harper: + +- Generates self-signed TLS certificates at `/keys/` on first run. +- Runs with HTTPS disabled (HTTP only on port 9925 for the Operations API). It is recommended that you never directly expose Harper's HTTP interface through a publicly available port. +- Enables CORS for all origins (configurable). +- Supports Basic Auth and JWT Auth by default; mTLS must be explicitly configured. diff --git a/reference/static-files/overview.md b/reference/static-files/overview.md new file mode 100644 index 00000000..2d0ea9f5 --- /dev/null +++ b/reference/static-files/overview.md @@ -0,0 +1,174 @@ +--- +id: overview +title: Static Files +--- + + + + + + + +# Static Files + +- Added in: v4.5.0 +- Changed in: v4.7.0 - (Migrated to Plugin API and new options added) + +The `static` built-in plugin serves static files from your Harper application over HTTP. Use it to host websites, SPAs, downloadable assets, or any static content alongside your Harper data and API endpoints. + +`static` does **not** need to be installed — it is built into Harper and only needs to be declared in your `config.yaml`. + +## Basic Usage + +Configure `static` with the `files` option pointing to the files you want to serve: + +```yaml +static: + files: 'site/**' +``` + +Given a component with this structure: + +``` +my-app/ +├─ site/ +│ ├─ index.html +│ ├─ about.html +│ ├─ blog/ +│ ├─ post-1.html +│ ├─ post-2.html +├─ config.yaml +``` + +Files are accessed relative to the matched directory root, so `GET /index.html` returns `site/index.html` and `GET /blog/post-1.html` returns `site/blog/post-1.html`. + +## `files` and `urlPath` Options + +Added in: v4.5 + +`static` is a [Plugin](../components/overview.md) and supports the standard `files` and `urlPath` configuration options for controlling which files to serve and at what URL path. + +Use `urlPath` to mount the files at a specific URL prefix: + +```yaml +static: + files: 'site/**' + urlPath: 'app' +``` + +Now `GET /app/index.html` returns `site/index.html` and `GET /app/blog/post-1.html` returns `site/blog/post-1.html`. + +See [Components Overview](../components/overview.md) for full `files` glob pattern and `urlPath` documentation. + +## Additional Options + +Added in: v4.7 + +In addition to the standard `files`, `urlPath`, and `timeout` options, `static` supports these configuration options: + +- **`index`** - `boolean` - _optional_ - If `true`, automatically serves `index.html` when a request targets a directory. Defaults to `false`. + +- **`extensions`** - `string[]` - _optional_ - File extensions to try when an exact path match is not found. For example, `extensions: ['html']` means a request for `/page-1` will also try `/page-1.html`. + +- **`fallthrough`** - `boolean` - _optional_ - If `true`, passes the request to the next handler when the requested file is not found. Set to `false` when using `notFound` to customize 404 responses. Defaults to `true`. + +- **`notFound`** - `string | { file: string; statusCode: number }` - _optional_ - A custom file (or file + status code) to return when a path is not found. Useful for serving a custom 404 page or for SPAs that use client-side routing. + +## Auto-Updates + +Added in: v4.7.0 + +Because `static` uses the Plugin API, it automatically responds to changes without requiring a Harper restart. Adding, removing, or modifying files — or updating `config.yaml` — takes effect immediately. + +## Examples + +### Basic static file serving + +Serve all files in the `static/` directory. Requests must match file names exactly. + +```yaml +static: + files: 'static/**' +``` + +### Automatic `index.html` serving + +Serve `index.html` automatically when a request targets a directory: + +```yaml +static: + files: 'static/**' + index: true +``` + +With this structure: + +``` +my-app/ +├─ static/ +│ ├─ index.html +│ ├─ blog/ +│ ├─ index.html +│ ├─ post-1.html +``` + +Request mappings: + +``` +GET / -> static/index.html +GET /blog -> static/blog/index.html +GET /blog/post-1.html -> static/blog/post-1.html +``` + +### Automatic extension matching + +Combine `index` and `extensions` for clean URLs without file extensions: + +```yaml +static: + files: 'static/**' + index: true + extensions: ['html'] +``` + +Request mappings with the same structure: + +``` +GET / -> static/index.html +GET /blog -> static/blog/index.html +GET /blog/post-1 -> static/blog/post-1.html +``` + +### Custom 404 page + +Return a specific file when a requested path is not found: + +```yaml +static: + files: 'static/**' + notFound: 'static/404.html' + fallthrough: false +``` + +A request to `/non-existent` returns the contents of `static/404.html` with a `404` status code. + +> **Note:** When using `notFound`, set `fallthrough: false` so the request does not pass through to another handler before the custom 404 response is returned. + +### SPA client-side routing + +For SPAs that handle routing in the browser, return the main application file for any unmatched path: + +```yaml +static: + files: 'static/**' + fallthrough: false + notFound: + file: 'static/index.html' + statusCode: 200 +``` + +A request to any unmatched path returns `static/index.html` with a `200` status code, allowing the client-side router to handle navigation. + +## Related + +- [Components Overview](../components/overview.md) diff --git a/reference/studio/overview.md b/reference/studio/overview.md new file mode 100644 index 00000000..c6096e3e --- /dev/null +++ b/reference/studio/overview.md @@ -0,0 +1,37 @@ +--- +title: Local Studio +--- + + + + +- Added in: v4.1.0 +- Changed in: v4.3.0 (Upgrade to match Cloud client) +- Changed in: v4.7.0 (Upgraded to match Fabric client) + +Harper Local Studio is a web-based GUI that enables you to administer, navigate, and monitor your Harper instance through a simple, user-friendly interface without requiring knowledge of the underlying Harper APIs. + +It is automatically bundled with all Harper instances and is enabled by default on the Operations API port. + +If you're looking for the platform as a service interface, go to [Harper Fabric](https://fabric.harper.fast) instead. + +## Configuration + +To enable the local Studio, set `localStudio.enabled` to `true` in your [configuration file](../configuration/options.md#localstudio): + +```yaml +localStudio: + enabled: true +``` + +The local studio is provided by the [Operations API](../operations-api/overview.md) and is available on the configured `operationsApi.port` or `operationsApi.securePort` values. This is `9925` by default. + +## Accessing Local Studio + +The local Studio can be accessed through your browser at: + +``` +http://localhost:9925 +``` + +All database interactions from the local Studio are made directly from your browser to your Harper instance. Authentication is maintained via session cookies. diff --git a/reference/users-and-roles/configuration.md b/reference/users-and-roles/configuration.md new file mode 100644 index 00000000..8177180b --- /dev/null +++ b/reference/users-and-roles/configuration.md @@ -0,0 +1,67 @@ +--- +id: configuration +title: Configuration +--- + + + + + +## Managing Roles with Config Files + +In addition to managing roles via the Operations API, Harper supports declaring roles in a configuration file. When the application starts, Harper ensures all declared roles exist with the specified permissions. + +Configure in your application's `config.yaml`: + +```yaml +roles: + files: roles.yaml +``` + +Example `roles.yaml`: + +```yaml +analyst: + super_user: false + data: + Sales: + read: true + insert: false + update: false + delete: false + +editor: + data: + Articles: + read: true + insert: true + update: true + attributes: + title: + read: true + update: true + author: + read: true + update: false +``` + +**Startup behavior:** + +- If a declared role does not exist, Harper creates it. +- If a declared role already exists, Harper updates its permissions to match the definition. + +## Password Hashing + +Added in: v4.5.0 + +Harper supports two password hashing algorithms, replacing the previous MD5 hashing: + +- **`sha256`** — Default algorithm. Good security and excellent performance. +- **`argon2id`** — Highest security. More CPU-intensive; recommended for high-security environments. + +Password hashing is configured via the `authentication.hashFunction` key in `harperdb-config.yaml`. See [Security / Configuration](../security/configuration.md#hashfunction) for details. + +## Related + +- [Overview](./overview) +- [Operations](./operations) diff --git a/reference/users-and-roles/operations.md b/reference/users-and-roles/operations.md new file mode 100644 index 00000000..5dc7c56f --- /dev/null +++ b/reference/users-and-roles/operations.md @@ -0,0 +1,176 @@ +--- +id: operations +title: Operations +--- + + + +## Roles + +### List Roles + +_Restricted to `super_user` roles._ + +```json +{ + "operation": "list_roles" +} +``` + +### Add Role + +_Restricted to `super_user` roles._ + +- `role` _(required)_ — Name for the new role. +- `permission` _(required)_ — Permissions object. See [Permission Structure](./overview#permission-structure). + - `super_user` _(optional)_ — If `true`, grants full access. Defaults to `false`. + - `structure_user` _(optional)_ — Boolean or array of database names. If `true`, can create/drop databases and tables. If array, limited to specified databases. + +```json +{ + "operation": "add_role", + "role": "developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "name", + "read": true, + "insert": true, + "update": true + } + ] + } + } + } + } +} +``` + +### Alter Role + +_Restricted to `super_user` roles._ + +- `id` _(required)_ — The `id` of the role to alter (from `list_roles`). +- `role` _(optional)_ — New name for the role. +- `permission` _(required)_ — Updated permissions object. + +```json +{ + "operation": "alter_role", + "id": "f92162e2-cd17-450c-aae0-372a76859038", + "role": "another_developer", + "permission": { + "super_user": false, + "structure_user": false, + "dev": { + "tables": { + "dog": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [] + } + } + } + } +} +``` + +### Drop Role + +_Restricted to `super_user` roles. Roles with associated users cannot be dropped._ + +- `id` _(required)_ — The `id` of the role to drop. + +```json +{ + "operation": "drop_role", + "id": "developer" +} +``` + +## Users + +### List Users + +_Restricted to `super_user` roles._ + +```json +{ + "operation": "list_users" +} +``` + +### User Info + +Returns user data for the currently authenticated user. Available to all roles. + +```json +{ + "operation": "user_info" +} +``` + +### Add User + +_Restricted to `super_user` roles._ + +- `role` _(required)_ — Role name to assign. +- `username` _(required)_ — Username. Cannot be changed after creation. +- `password` _(required)_ — Plain-text password. Harper encrypts it on receipt. +- `active` _(required)_ — Boolean. If `false`, user cannot access Harper. + +```json +{ + "operation": "add_user", + "role": "role_name", + "username": "hdb_user", + "password": "password", + "active": true +} +``` + +### Alter User + +_Restricted to `super_user` roles._ + +- `username` _(required)_ — Username to modify. +- `password` _(optional)_ — New password. +- `role` _(optional)_ — New role name. +- `active` _(optional)_ — Enable/disable user access. + +```json +{ + "operation": "alter_user", + "role": "role_name", + "username": "hdb_user", + "password": "new_password", + "active": true +} +``` + +### Drop User + +_Restricted to `super_user` roles._ + +```json +{ + "operation": "drop_user", + "username": "harper" +} +``` + +## Related + +- [Overview](./overview) +- [Configuration](./configuration) diff --git a/reference/users-and-roles/overview.md b/reference/users-and-roles/overview.md new file mode 100644 index 00000000..bbb99cdc --- /dev/null +++ b/reference/users-and-roles/overview.md @@ -0,0 +1,253 @@ +--- +id: overview +title: Users & Roles +--- + + + + +Harper uses a Role-Based Access Control (RBAC) framework to manage access to Harper instances. Each user is assigned a role that determines their permissions to access database resources and run operations. + +## Roles + +Role permissions in Harper are divided into two categories: + +**Database Manipulation** — CRUD (create, read, update, delete) permissions against database data (tables and attributes). + +**Database Definition** — Permissions to manage databases, tables, roles, users, and other system settings. These are restricted to the built-in `super_user` role. + +### Built-In Roles + +| Role | Description | +| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| `super_user` | Full access to all operations and methods. The admin role. | +| `structure_user` | Access to create and delete databases and tables. Can be set to `true` (all databases) or an array of database names (specific databases only). | + +### User-Defined Roles + +Admins (`super_user` users) can create custom roles with explicit permissions on specific tables and attributes. + +- Unless a user-defined role has `super_user: true`, all permissions must be defined explicitly. +- Any table or database not included in the role's permission set will be inaccessible. +- `describe` operations return metadata only for databases, tables, and attributes that the role has CRUD permissions for. + +## Permission Structure + +When creating or altering a role, you define a `permission` object: + +```json +{ + "operation": "add_role", + "role": "software_developer", + "permission": { + "super_user": false, + "database_name": { + "tables": { + "table_name1": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [ + { + "attribute_name": "attribute1", + "read": true, + "insert": true, + "update": true + } + ] + }, + "table_name2": { + "read": true, + "insert": true, + "update": true, + "delete": false, + "attribute_permissions": [] + } + } + } + } +} +``` + +### Table Permissions + +Each table entry defines CRUD access: + +```jsonc +{ + "table_name": { + "read": boolean, // Access to read from this table + "insert": boolean, // Access to insert data + "update": boolean, // Access to update data + "delete": boolean, // Access to delete rows + "attribute_permissions": [ + { + "attribute_name": "attribute_name", + "read": boolean, + "insert": boolean, + "update": boolean + // Note: "delete" is not an attribute-level permission + } + ] + } +} +``` + +### Important Rules + +**Table-level:** + +- If a database or table is not included in the permissions, the role has no access to it. +- If a table-level CRUD permission is `false`, setting the same CRUD permission to `true` on an attribute returns an error. + +**Attribute-level:** + +- If `attribute_permissions` is a non-empty array, only the listed attributes are accessible (plus the table's hash attribute — see below). +- If `attribute_permissions` is empty (`[]`), attribute access follows the table-level CRUD permissions. +- If any non-hash attribute is given CRUD access, the table's `hash_attribute` (primary key) automatically receives the same access, even if not explicitly listed. +- Any attribute not explicitly listed in a non-empty `attribute_permissions` array has no access. +- `DELETE` is not an attribute-level permission. Deleting rows is controlled at the table level. +- The `__createdtime__` and `__updatedtime__` attributes managed by Harper can have `read` permissions set; other attribute-level permissions for these fields are ignored. + +## Role-Based Operation Restrictions + +The following table shows which operations are restricted to `super_user` roles. Non-`super_user` roles are also restricted within their accessible operations by their CRUD permission set. + +### Databases and Tables + +| Operation | Restricted to Super User | +| ------------------- | :----------------------: | +| `describe_all` | | +| `describe_database` | | +| `describe_table` | | +| `create_database` | X | +| `drop_database` | X | +| `create_table` | X | +| `drop_table` | X | +| `create_attribute` | | +| `drop_attribute` | X | + +### NoSQL Operations + +| Operation | Restricted to Super User | +| ---------------------- | :----------------------: | +| `insert` | | +| `update` | | +| `upsert` | | +| `delete` | | +| `search_by_hash` | | +| `search_by_value` | | +| `search_by_conditions` | | + +### SQL Operations + +| Operation | Restricted to Super User | +| --------- | :----------------------: | +| `select` | | +| `insert` | | +| `update` | | +| `delete` | | + +### Bulk Operations + +| Operation | Restricted to Super User | +| ---------------- | :----------------------: | +| `csv_data_load` | | +| `csv_file_load` | | +| `csv_url_load` | | +| `import_from_s3` | | + +### Users and Roles + +| Operation | Restricted to Super User | +| ------------ | :----------------------: | +| `list_roles` | X | +| `add_role` | X | +| `alter_role` | X | +| `drop_role` | X | +| `list_users` | X | +| `user_info` | | +| `add_user` | X | +| `alter_user` | X | +| `drop_user` | X | + +### Clustering + +| Operation | Restricted to Super User | +| ----------------------- | :----------------------: | +| `cluster_set_routes` | X | +| `cluster_get_routes` | X | +| `cluster_delete_routes` | X | +| `add_node` | X | +| `update_node` | X | +| `cluster_status` | X | +| `remove_node` | X | +| `configure_cluster` | X | + +### Components + +| Operation | Restricted to Super User | +| -------------------- | :----------------------: | +| `get_components` | X | +| `get_component_file` | X | +| `set_component_file` | X | +| `drop_component` | X | +| `add_component` | X | +| `package_component` | X | +| `deploy_component` | X | + +### Registration + +| Operation | Restricted to Super User | +| ------------------- | :----------------------: | +| `registration_info` | | +| `get_fingerprint` | X | +| `set_license` | X | + +### Jobs + +| Operation | Restricted to Super User | +| --------------------------- | :----------------------: | +| `get_job` | | +| `search_jobs_by_start_date` | X | + +### Logs + +| Operation | Restricted to Super User | +| -------------------------------- | :----------------------: | +| `read_log` | X | +| `read_transaction_log` | X | +| `delete_transaction_logs_before` | X | +| `read_audit_log` | X | +| `delete_audit_logs_before` | X | + +### Utilities + +| Operation | Restricted to Super User | +| ----------------------- | :----------------------: | +| `delete_records_before` | X | +| `export_local` | X | +| `export_to_s3` | X | +| `system_information` | X | +| `restart` | X | +| `restart_service` | X | +| `get_configuration` | X | + +### Token Authentication + +| Operation | Restricted to Super User | +| ------------------------------ | :----------------------: | +| `create_authentication_tokens` | | +| `refresh_operation_token` | | + +## Troubleshooting: "Must execute as User" + +If you see the error `Error: Must execute as <>`, it means Harper was installed as a specific OS user and must be run by that same user. Harper stores files natively on the operating system and only allows the Harper executable to be run by a single user — this prevents file permission issues and keeps the installation secure. + +To resolve: run Harper with the same OS user account used during installation. + +## Related + +- [Configuration](./configuration) +- [Operations](./operations) From 5e84ecf03a583129c5b752e79369b94d5c4d4691 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 19:58:58 -0600 Subject: [PATCH 38/51] finish redirects --- CONTRIBUTING.md | 25 + docusaurus.config.ts | 1 - historic-redirects.ts | 1811 +++++++++++++++++++++++++++++ memory/part5-redirects.md | 53 +- package.json | 10 +- redirects.ts | 469 +++++--- scripts/harper-docs-analytics.csv | 1 - scripts/pageview-data-test.js | 215 ++++ scripts/postbuild.js | 118 -- sidebarsReference.ts | 495 +++++++- 10 files changed, 2882 insertions(+), 316 deletions(-) create mode 100644 historic-redirects.ts create mode 100644 scripts/pageview-data-test.js delete mode 100644 scripts/postbuild.js diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 69178c31..1bdfd7c2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,6 +44,31 @@ Depending on the specific change, you may need to make updates to similar files The site organization is ever evolving so make sure to revisit this file over time to stay up to date with the latest structure. +## Known Issues + +### `docusaurus serve` 404s on `/docs/4.X` paths + +`docusaurus serve` uses `serve-handler`, which treats ending URL path segments containing a singular dot (e.g. `4.6`) as file extensions rather than directory names. This causes `/docs/4.6` to 404 locally even though the redirect page exists at `build/docs/4.6/index.html`. This doesn't apply to nested paths such as `/docs/4.6/developers`. + +A fix has been submitted upstream at https://github.com/vercel/serve-handler/pull/230. Once it merges and Docusaurus upgrades its dependency, the local patch can be removed. + +In the meantime, if you need to test these redirects locally, apply a change in `node_modules/serve-handler/src/index.js` around line 608 where you clear the `stats` variable from `lstat` if it is a directory so it falls through to the nested `index.html`. + +```js +if (path.extname(relativePath) !== '') { + try { + stats = await handlers.lstat(absolutePath); + if (stats && stats.isDirectory()) { + stats = null; + } + } catch (err) { + if (err.code !== 'ENOENT' && err.code !== 'ENOTDIR') { + return internalError(absolutePath, response, acceptsJSON, current, handlers, config, err); + } + } +} +``` + ## Release Notes Process When adding release notes for a new HarperDB version, follow these steps: diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 464af95d..609c6c0b 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -20,7 +20,6 @@ if (process.env.NODE_ENV === 'production') { const baseUrl = process.env.DOCUSAURUS_BASE_URL || '/'; // Determine route base path for docs -// Can be set to '/docs/' if we need docs under a subdirectory // Default is '/' to serve docs at the root const routeBasePath = process.env.DOCUSAURUS_ROUTE_BASE_PATH; // matching the production URL structure since this will currently affect some relative links in the docs diff --git a/historic-redirects.ts b/historic-redirects.ts new file mode 100644 index 00000000..b9119539 --- /dev/null +++ b/historic-redirects.ts @@ -0,0 +1,1811 @@ +// Historic redirect rules for versioned /docs/4.X/* paths +// +// All source paths are from GA pageview data (Oct 2025 – Feb 2026). +// These paths come from Docusaurus versioned docs that have been removed. +// All versioned /docs/4.X/* paths redirect to /reference/v4/*. +// +// This file is generated once and committed. It should not need to change +// unless new analytics data reveals missed paths. + +// NOTE: Future redirects should be added to redirects.ts instead. + +type RedirectRule = { + to: string; + from: string | string[]; +}; + +// ─── Segment mapping helpers ────────────────────────────────────────────────── +// Converts old doc segments to new /reference/v4/ equivalents. +// Applied after stripping the /docs/4.X prefix. + +// Paths that are junk/artifacts we intentionally skip (no redirect): +// /~gitbook/pdf — GitBook PDF export URL, not a real page +// /docs/4.X/4.X/... — malformed double-version paths +// /docs/4.4./getting-started/ — typo path with extra dot + +export const historicRedirects: RedirectRule[] = [ + // ── Version roots ────────────────────────────────────────────────────────── + { from: ['/docs/4.1', '/docs/4.2', '/docs/4.3', '/docs/4.4', '/docs/4.5', '/docs/4.6'], to: '/reference/v4' }, + + // ── Getting Started ──────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/getting-started', + '/docs/4.2/getting-started', + '/docs/4.3/getting-started', + '/docs/4.4/getting-started', + '/docs/4.5/getting-started', + '/docs/4.6/getting-started', + ], + to: '/learn', + }, + { + from: [ + '/docs/4.4/getting-started/quickstart', + '/docs/4.5/getting-started/quickstart', + '/docs/4.6/getting-started/quickstart', + ], + to: '/learn', + }, + { + from: [ + '/docs/4.4/getting-started/installation', + '/docs/4.5/getting-started/installation', + '/docs/4.6/getting-started/installation', + ], + to: '/learn/getting-started/install-and-connect-harper', + }, + { + from: ['/docs/4.5/getting-started/install-harper', '/docs/4.5/getting-started/first-harper-app'], + to: '/learn/getting-started/install-and-connect-harper', + }, + { from: ['/docs/4.5/getting-started/harper-concepts', '/docs/4.6/getting-started/harper-concepts'], to: '/learn' }, + + // ── Foundations ─────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.4/foundations/core-concepts', + '/docs/4.5/foundations/core-concepts', + '/docs/4.6/foundations/core-concepts', + ], + to: '/learn', + }, + { + from: [ + '/docs/4.4/foundations/harper-architecture', + '/docs/4.5/foundations/harper-architecture', + '/docs/4.6/foundations/harper-architecture', + ], + to: '/learn', + }, + { + from: ['/docs/4.4/foundations/use-cases', '/docs/4.5/foundations/use-cases', '/docs/4.6/foundations/use-cases'], + to: '/learn', + }, + + // ── Operations API ───────────────────────────────────────────────────────── + { + from: ['/docs/4.1/operations-api', '/docs/4.2/operations-api', '/docs/4.3/operations-api'], + to: '/reference/v4/operations-api/overview', + }, + { + from: [ + '/docs/4.2/developers/operations-api', + '/docs/4.3/developers/operations-api', + '/docs/4.4/developers/operations-api', + '/docs/4.5/developers/operations-api', + '/docs/4.6/developers/operations-api', + ], + to: '/reference/v4/operations-api/overview', + }, + { + from: [ + '/docs/4.2/developers/operations-api/nosql-operations', + '/docs/4.3/developers/operations-api/nosql-operations', + '/docs/4.4/developers/operations-api/nosql-operations', + '/docs/4.5/developers/operations-api/nosql-operations', + '/docs/4.6/developers/operations-api/nosql-operations', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/databases-and-tables', + '/docs/4.3/developers/operations-api/databases-and-tables', + '/docs/4.4/developers/operations-api/databases-and-tables', + '/docs/4.5/developers/operations-api/databases-and-tables', + '/docs/4.6/developers/operations-api/databases-and-tables', + ], + to: '/reference/v4/database/overview', + }, + { + from: [ + '/docs/4.2/developers/operations-api/components', + '/docs/4.3/developers/operations-api/components', + '/docs/4.4/developers/operations-api/components', + '/docs/4.5/developers/operations-api/components', + '/docs/4.6/developers/operations-api/components', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/advanced-json-sql-examples', + '/docs/4.3/developers/operations-api/advanced-json-sql-examples', + '/docs/4.4/developers/operations-api/advanced-json-sql-examples', + '/docs/4.5/developers/operations-api/advanced-json-sql-examples', + '/docs/4.6/developers/operations-api/advanced-json-sql-examples', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/bulk-operations', + '/docs/4.3/developers/operations-api/bulk-operations', + '/docs/4.4/developers/operations-api/bulk-operations', + '/docs/4.5/developers/operations-api/bulk-operations', + '/docs/4.6/developers/operations-api/bulk-operations', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/sql-operations', + '/docs/4.3/developers/operations-api/sql-operations', + '/docs/4.4/developers/operations-api/sql-operations', + '/docs/4.5/developers/operations-api/sql-operations', + '/docs/4.6/developers/operations-api/sql-operations', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.2/developers/operations-api/quickstart-examples', + '/docs/4.3/developers/operations-api/quickstart-examples', + '/docs/4.4/developers/operations-api/quickstart-examples', + '/docs/4.5/developers/operations-api/quickstart-examples', + '/docs/4.6/developers/operations-api/quickstart-examples', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/registration', + '/docs/4.3/developers/operations-api/registration', + '/docs/4.4/developers/operations-api/registration', + '/docs/4.5/developers/operations-api/registration', + '/docs/4.6/developers/operations-api/registration', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/token-authentication', + '/docs/4.3/developers/operations-api/token-authentication', + '/docs/4.4/developers/operations-api/token-authentication', + '/docs/4.5/developers/operations-api/token-authentication', + '/docs/4.6/developers/operations-api/token-authentication', + ], + to: '/reference/v4/security/jwt-authentication', + }, + { + from: [ + '/docs/4.2/developers/operations-api/users-and-roles', + '/docs/4.3/developers/operations-api/users-and-roles', + '/docs/4.4/developers/operations-api/users-and-roles', + '/docs/4.5/developers/operations-api/users-and-roles', + '/docs/4.6/developers/operations-api/users-and-roles', + ], + to: '/reference/v4/users-and-roles/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/jobs', + '/docs/4.3/developers/operations-api/jobs', + '/docs/4.4/developers/operations-api/jobs', + '/docs/4.5/developers/operations-api/jobs', + '/docs/4.6/developers/operations-api/jobs', + ], + to: '/reference/v4/database/jobs', + }, + { + from: [ + '/docs/4.2/developers/operations-api/logs', + '/docs/4.3/developers/operations-api/logs', + '/docs/4.4/developers/operations-api/logs', + '/docs/4.5/developers/operations-api/logs', + '/docs/4.6/developers/operations-api/logs', + ], + to: '/reference/v4/logging/operations', + }, + { + from: [ + '/docs/4.2/developers/operations-api/utilities', + '/docs/4.3/developers/operations-api/utilities', + '/docs/4.4/developers/operations-api/utilities', + '/docs/4.5/developers/operations-api/utilities', + ], + to: '/reference/v4/operations-api/operations', + }, + { + from: [ + '/docs/4.3/developers/operations-api/custom-functions', + '/docs/4.4/developers/operations-api/custom-functions', + '/docs/4.5/developers/operations-api/custom-functions', + '/docs/4.6/developers/operations-api/custom-functions', + ], + to: '/reference/v4/legacy/custom-functions', + }, + { + from: [ + '/docs/4.4/developers/operations-api/clustering', + '/docs/4.4/developers/operations-api/clustering-nats', + '/docs/4.4/developers/operations-api/clustering/clustering-nats', + '/docs/4.5/developers/operations-api/clustering', + '/docs/4.5/developers/operations-api/clustering-nats', + '/docs/4.6/developers/operations-api/clustering', + '/docs/4.6/developers/operations-api/clustering-nats', + ], + to: '/reference/v4/replication/clustering', + }, + { + from: ['/docs/4.4/developers/operations-api/configuration', '/docs/4.6/developers/operations-api/configuration'], + to: '/reference/v4/configuration/operations', + }, + { from: '/docs/4.6/developers/operations-api/analytics', to: '/reference/v4/analytics/operations' }, + { + from: '/docs/4.6/developers/operations-api/certificate-management', + to: '/reference/v4/security/certificate-management', + }, + { from: '/docs/4.6/developers/operations-api/system-operations', to: '/reference/v4/operations-api/operations' }, + { + from: [ + '/docs/4.1/developers/operations-api/bulk-operations', + '/docs/4.1/developers/operations-api/clustering', + '/docs/4.1/developers/operations-api/jobs', + '/docs/4.1/developers/operations-api/logs', + '/docs/4.1/developers/operations-api/registration', + '/docs/4.1/developers/operations-api/sql-operations', + '/docs/4.1/developers/operations-api/token-authentication', + '/docs/4.1/developers/operations-api/users-and-roles', + '/docs/4.1/developers/operations-api/utilities', + ], + to: '/reference/v4/operations-api/operations', + }, + + // ── Applications ─────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/developers/applications', + '/docs/4.2/developers/applications', + '/docs/4.3/developers/applications', + '/docs/4.4/developers/applications', + '/docs/4.5/developers/applications', + '/docs/4.6/developers/applications', + ], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.2/developers/applications/defining-schemas', + '/docs/4.3/developers/applications/defining-schemas', + '/docs/4.4/developers/applications/defining-schemas', + '/docs/4.5/developers/applications/defining-schemas', + '/docs/4.6/developers/applications/defining-schemas', + '/docs/4.1/developers/applications/defining-schemas', + ], + to: '/reference/v4/database/schema', + }, + { + from: [ + '/docs/4.2/developers/applications/caching', + '/docs/4.3/developers/applications/caching', + '/docs/4.4/developers/applications/caching', + '/docs/4.5/developers/applications/caching', + '/docs/4.6/developers/applications/caching', + ], + to: '/reference/v4/resources/overview', + }, + { from: '/docs/4.6/developers/applications/data-loader', to: '/reference/v4/database/data-loader' }, + { + from: [ + '/docs/4.4/developers/applications/web-applications', + '/docs/4.5/developers/applications/web-applications', + '/docs/4.6/developers/applications/web-applications', + ], + to: '/reference/v4/components/applications', + }, + { + from: [ + '/docs/4.2/developers/applications/debugging', + '/docs/4.3/developers/applications/debugging', + '/docs/4.4/developers/applications/debugging', + '/docs/4.5/developers/applications/debugging', + '/docs/4.6/developers/applications/debugging', + '/docs/4.1/developers/applications/debugging', + ], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.2/developers/applications/define-routes', + '/docs/4.3/developers/applications/define-routes', + '/docs/4.4/developers/applications/define-routes', + '/docs/4.5/developers/applications/define-routes', + '/docs/4.6/developers/applications/define-routes', + '/docs/4.1/developers/applications/define-routes', + ], + to: '/reference/v4/fastify-routes/overview', + }, + { + from: [ + '/docs/4.4/developers/applications/defining-roles', + '/docs/4.5/developers/applications/defining-roles', + '/docs/4.6/developers/applications/defining-roles', + ], + to: '/reference/v4/users-and-roles/overview', + }, + { + from: [ + '/docs/4.2/developers/applications/example-projects', + '/docs/4.3/developers/applications/example-projects', + '/docs/4.4/developers/applications/example-projects', + '/docs/4.5/developers/applications/example-projects', + '/docs/4.6/developers/applications/example-projects', + ], + to: '/reference/v4/components/overview', + }, + + // ── Components (old /developers/components/*) ────────────────────────────── + { + from: [ + '/docs/4.1/developers/components', + '/docs/4.2/developers/components', + '/docs/4.3/developers/components', + '/docs/4.4/developers/components', + '/docs/4.5/developers/components', + '/docs/4.6/developers/components', + ], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.1/developers/components/writing-extensions', + '/docs/4.2/developers/components/writing-extensions', + '/docs/4.3/developers/components/writing-extensions', + '/docs/4.4/developers/components/writing-extensions', + ], + to: '/reference/v4/components/extension-api', + }, + { + from: [ + '/docs/4.1/developers/components/drivers', + '/docs/4.2/developers/components/drivers', + '/docs/4.3/developers/components/drivers', + '/docs/4.1/developers/components/installing', + '/docs/4.2/developers/components/installing', + '/docs/4.3/developers/components/installing', + '/docs/4.4/developers/components/installing', + '/docs/4.2/developers/components/operations', + '/docs/4.3/developers/components/operations', + '/docs/4.4/developers/components/operations', + ], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.2/developers/components/sdks', + '/docs/4.3/developers/components/sdks', + '/docs/4.4/developers/components/sdks', + '/docs/4.5/developers/miscellaneous/sdks', + '/docs/4.6/developers/miscellaneous/sdks', + '/docs/4.4/developers/miscellaneous/sdks', + '/docs/4.4/developers/miscellaneous', + '/docs/4.5/developers/miscellaneous', + '/docs/4.6/developers/miscellaneous', + '/docs/4.2/developers/components/google-data-studio', + '/docs/4.3/developers/components/google-data-studio', + '/docs/4.4/developers/miscellaneous/google-data-studio', + ], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.4/developers/components/built-in', + '/docs/4.5/developers/components/built-in', + '/docs/4.4/developers/components/reference', + '/docs/4.5/developers/components/reference', + ], + to: '/reference/v4/components/extension-api', + }, + { + from: ['/docs/4.4/developers/components/managing', '/docs/4.5/developers/components/managing'], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.4/developers/miscellaneous/query-optimization', + '/docs/4.6/developers/miscellaneous/query-optimization', + ], + to: '/reference/v4/resources/query-optimization', + }, + + // ── Security ─────────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/developers/security', + '/docs/4.2/developers/security', + '/docs/4.3/developers/security', + '/docs/4.4/developers/security', + '/docs/4.5/developers/security', + '/docs/4.6/developers/security', + ], + to: '/reference/v4/security/overview', + }, + { + from: [ + '/docs/4.1/developers/security/configuration', + '/docs/4.2/developers/security/configuration', + '/docs/4.3/developers/security/configuration', + '/docs/4.4/developers/security/configuration', + '/docs/4.5/developers/security/configuration', + '/docs/4.6/developers/security/configuration', + ], + to: '/reference/v4/security/configuration', + }, + { + from: [ + '/docs/4.1/developers/security/users-and-roles', + '/docs/4.2/developers/security/users-and-roles', + '/docs/4.3/developers/security/users-and-roles', + '/docs/4.4/developers/security/users-and-roles', + '/docs/4.5/developers/security/users-and-roles', + '/docs/4.6/developers/security/users-and-roles', + ], + to: '/reference/v4/users-and-roles/overview', + }, + { + from: [ + '/docs/4.1/developers/security/jwt-auth', + '/docs/4.2/developers/security/jwt-auth', + '/docs/4.3/developers/security/jwt-auth', + '/docs/4.5/developers/security/jwt-auth', + '/docs/4.6/developers/security/jwt-auth', + ], + to: '/reference/v4/security/jwt-authentication', + }, + { + from: [ + '/docs/4.1/developers/security/basic-auth', + '/docs/4.2/developers/security/basic-auth', + '/docs/4.3/developers/security/basic-auth', + '/docs/4.5/developers/security/basic-auth', + '/docs/4.6/developers/security/basic-auth', + ], + to: '/reference/v4/security/basic-authentication', + }, + { + from: [ + '/docs/4.1/developers/security/certificate-management', + '/docs/4.2/developers/security/certificate-management', + '/docs/4.3/developers/security/certificate-management', + '/docs/4.4/developers/security/certificate-management', + '/docs/4.5/developers/security/certificate-management', + '/docs/4.6/developers/security/certificate-management', + ], + to: '/reference/v4/security/certificate-management', + }, + { + from: [ + '/docs/4.3/developers/security/mtls-auth', + '/docs/4.4/developers/security/mtls-auth', + '/docs/4.5/developers/security/mtls-auth', + '/docs/4.6/developers/security/mtls-auth', + ], + to: '/reference/v4/security/mtls-authentication', + }, + { from: ['/docs/4.1/security', '/docs/4.2/security', '/docs/4.3/security'], to: '/reference/v4/security/overview' }, + { + from: ['/docs/4.1/security/configuration', '/docs/4.2/security/configuration', '/docs/4.3/security/configuration'], + to: '/reference/v4/security/configuration', + }, + { + from: ['/docs/4.1/security/jwt-auth', '/docs/4.2/security/jwt-auth'], + to: '/reference/v4/security/jwt-authentication', + }, + { + from: ['/docs/4.1/security/basic-auth', '/docs/4.2/security/basic-auth'], + to: '/reference/v4/security/basic-authentication', + }, + { + from: ['/docs/4.1/security/certificate-management', '/docs/4.2/security/certificate-management'], + to: '/reference/v4/security/certificate-management', + }, + { + from: ['/docs/4.1/security/users-and-roles', '/docs/4.2/security/users-and-roles'], + to: '/reference/v4/users-and-roles/overview', + }, + + // ── Replication / Clustering ─────────────────────────────────────────────── + { + from: [ + '/docs/4.1/developers/replication', + '/docs/4.2/developers/replication', + '/docs/4.3/developers/replication', + '/docs/4.4/developers/replication', + '/docs/4.5/developers/replication', + '/docs/4.6/developers/replication', + ], + to: '/reference/v4/replication/overview', + }, + { + from: ['/docs/4.5/developers/replication/sharding', '/docs/4.6/developers/replication/sharding'], + to: '/reference/v4/replication/sharding', + }, + { + from: [ + '/docs/4.1/developers/clustering/certificate-management', + '/docs/4.2/developers/clustering/certificate-management', + '/docs/4.3/developers/clustering/certificate-management', + '/docs/4.1/developers/replication/clustering/certificate-management', + '/docs/4.2/developers/replication/clustering/certificate-management', + '/docs/4.3/developers/replication/clustering/certificate-management', + '/docs/4.4/developers/replication/clustering/creating-a-cluster-user', + '/docs/4.5/developers/replication/clustering/certificate-management', + '/docs/4.6/developers/replication/clustering/certificate-management', + ], + to: '/reference/v4/security/certificate-management', + }, + { + from: [ + '/docs/4.1/developers/clustering/enabling-clustering', + '/docs/4.2/developers/clustering/enabling-clustering', + '/docs/4.3/developers/clustering/enabling-clustering', + '/docs/4.4/developers/replication/clustering/enabling-clustering', + '/docs/4.4/developers/replication/clustering', + '/docs/4.4/developers/replication/clustering/managing-subscriptions', + '/docs/4.4/developers/replication/clustering/naming-a-node', + '/docs/4.4/developers/replication/clustering/establishing-routes', + '/docs/4.4/developers/replication/clustering/requirements-and-definitions', + '/docs/4.4/developers/replication/clustering/subscription-overview', + '/docs/4.4/developers/replication/clustering/things-worth-knowing', + '/docs/4.5/developers/replication/clustering/enabling-clustering', + '/docs/4.5/developers/replication/clustering/naming-a-node', + '/docs/4.5/developers/replication/clustering/subscription-overview', + '/docs/4.6/developers/replication/clustering/managing-subscriptions', + '/docs/4.6/developers/replication/clustering/things-worth-knowing', + '/docs/4.1/developers/clustering/managing-subscriptions', + '/docs/4.2/developers/clustering/creating-a-cluster-user', + '/docs/4.3/developers/clustering/creating-a-cluster-user', + '/docs/4.3/developers/clustering/establishing-routes', + '/docs/4.3/developers/clustering/managing-subscriptions', + '/docs/4.3/developers/clustering/naming-a-node', + '/docs/4.3/developers/clustering/requirements-and-definitions', + '/docs/4.3/developers/clustering/subscription-overview', + '/docs/4.3/developers/clustering/things-worth-knowing', + '/docs/4.3/developers/clustering', + '/docs/4.4/developers/clustering', + '/docs/4.4/developers/clustering/managing-subscriptions', + '/docs/4.4/developers/clustering/naming-a-node', + '/docs/4.4/developers/clustering/subscription-overview', + '/docs/4.4/developers/clustering/things-worth-knowing', + '/docs/4.5/developers/clustering', + '/docs/4.5/developers/clustering/certificate-management', + '/docs/4.5/developers/clustering/creating-a-cluster-user', + '/docs/4.5/developers/clustering/enabling-clustering', + '/docs/4.5/developers/clustering/establishing-routes', + '/docs/4.5/developers/clustering/managing-subscriptions', + '/docs/4.5/developers/clustering/naming-a-node', + '/docs/4.5/developers/clustering/requirements-and-definitions', + '/docs/4.5/developers/clustering/subscription-overview', + '/docs/4.5/developers/clustering/things-worth-knowing', + '/docs/4.6/developers/clustering', + '/docs/4.6/developers/clustering/managing-subscriptions', + '/docs/4.6/developers/clustering/naming-a-node', + '/docs/4.6/developers/clustering/things-worth-knowing', + ], + to: '/reference/v4/replication/clustering', + }, + { + from: [ + '/docs/4.1/clustering', + '/docs/4.2/clustering/creating-a-cluster-user', + '/docs/4.2/clustering/things-worth-knowing', + '/docs/4.3/clustering/creating-a-cluster-user', + '/docs/4.1/clustering/certificate-management', + '/docs/4.1/clustering/creating-a-cluster-user', + '/docs/4.1/clustering/enabling-clustering', + '/docs/4.1/clustering/establishing-routes', + '/docs/4.1/clustering/managing-subscriptions', + '/docs/4.1/clustering/naming-a-node', + '/docs/4.1/clustering/requirements-and-definitions', + '/docs/4.1/clustering/subscription-overview', + '/docs/4.1/clustering/things-worth-knowing', + ], + to: '/reference/v4/replication/clustering', + }, + + // ── REST / Real-time ──────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/rest', + '/docs/4.2/rest', + '/docs/4.3/rest', + '/docs/4.2/developers/rest', + '/docs/4.3/developers/rest', + '/docs/4.4/developers/rest', + '/docs/4.5/developers/rest', + '/docs/4.6/developers/rest', + ], + to: '/reference/v4/rest/overview', + }, + { + from: [ + '/docs/4.1/developers/real-time', + '/docs/4.2/developers/real-time', + '/docs/4.3/developers/real-time', + '/docs/4.4/developers/real-time', + '/docs/4.5/developers/real-time', + '/docs/4.6/developers/real-time', + ], + to: '/reference/v4/rest/websockets', + }, + + // ── SQL Guide ───────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/sql-guide', + '/docs/4.2/sql-guide/delete', + '/docs/4.2/sql-guide/insert', + '/docs/4.3/sql-guide/delete', + '/docs/4.3/sql-guide/insert', + '/docs/4.3/sql-guide/select', + '/docs/4.1/developers/sql-guide', + '/docs/4.2/developers/sql-guide', + '/docs/4.3/developers/sql-guide', + '/docs/4.4/developers/sql-guide', + '/docs/4.5/developers/sql-guide', + '/docs/4.6/developers/sql-guide', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.1/developers/sql-guide/date-functions', + '/docs/4.2/developers/sql-guide/date-functions', + '/docs/4.3/developers/sql-guide/date-functions', + '/docs/4.4/developers/sql-guide/date-functions', + '/docs/4.5/developers/sql-guide/date-functions', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.1/developers/sql-guide/features-matrix', + '/docs/4.2/developers/sql-guide/features-matrix', + '/docs/4.3/developers/sql-guide/features-matrix', + '/docs/4.4/developers/sql-guide/features-matrix', + '/docs/4.5/developers/sql-guide/features-matrix', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.1/developers/sql-guide/functions', + '/docs/4.2/developers/sql-guide/functions', + '/docs/4.3/developers/sql-guide/functions', + '/docs/4.4/developers/sql-guide/functions', + '/docs/4.5/developers/sql-guide/functions', + '/docs/4.6/developers/sql-guide/functions', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.1/developers/sql-guide/sql-geospatial-functions', + '/docs/4.2/developers/sql-guide/sql-geospatial-functions', + '/docs/4.3/developers/sql-guide/sql-geospatial-functions', + '/docs/4.4/developers/sql-guide/sql-geospatial-functions', + '/docs/4.5/developers/sql-guide/sql-geospatial-functions', + '/docs/4.6/developers/sql-guide/sql-geospatial-functions', + '/docs/4.3/sql-guide/sql-geospatial-functions/geoequal', + '/docs/4.2/sql-guide/sql-geospatial-functions/geoconvert', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.2/developers/sql-guide/json-search', + '/docs/4.3/developers/sql-guide/json-search', + '/docs/4.5/reference/sql-guide/json-search', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.3/developers/sql-guide/reserved-word', + '/docs/4.4/reference/sql-guide/reserved-word', + '/docs/4.5/developers/sql-guide/reserved-word', + '/docs/4.5/reference/sql-guide/reserved-word', + '/docs/4.6/reference/sql-guide/reserved-word', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.4/reference/sql-guide', + '/docs/4.4/reference/sql-guide/features-matrix', + '/docs/4.4/reference/sql-guide/sql-geospatial-functions', + '/docs/4.5/reference/sql-guide', + '/docs/4.5/reference/sql-guide/date-functions', + '/docs/4.5/reference/sql-guide/features-matrix', + '/docs/4.5/reference/sql-guide/functions', + '/docs/4.5/reference/sql-guide/sql-geospatial-functions', + '/docs/4.6/reference/sql-guide', + '/docs/4.6/reference/sql-guide/date-functions', + '/docs/4.6/reference/sql-guide/features-matrix', + '/docs/4.6/reference/sql-guide/functions', + '/docs/4.6/reference/sql-guide/sql-geospatial-functions', + '/docs/4.1/sql-guide/date-functions', + '/docs/4.1/sql-guide/delete', + '/docs/4.1/sql-guide/features-matrix', + '/docs/4.1/sql-guide/functions', + '/docs/4.1/sql-guide/insert', + '/docs/4.1/sql-guide/joins', + '/docs/4.1/sql-guide/json-search', + '/docs/4.1/sql-guide/reserved-word', + '/docs/4.1/sql-guide/select', + '/docs/4.1/sql-guide/update', + '/docs/4.2/reference/sql-guide', + '/docs/4.2/reference/sql-guide/date-functions', + '/docs/4.2/reference/sql-guide/json-search', + '/docs/4.3/reference/sql-guide', + '/docs/4.3/reference/sql-guide/date-functions', + '/docs/4.3/reference/sql-guide/sql-geospatial-functions', + ], + to: '/reference/v4/database/sql', + }, + { + from: [ + '/docs/4.1/sql-guide/sql-geospatial-functions', + '/docs/4.1/sql-guide/sql-geospatial-functions/geoarea', + '/docs/4.1/sql-guide/sql-geospatial-functions/geocontains', + '/docs/4.1/sql-guide/sql-geospatial-functions/geoconvert', + '/docs/4.1/sql-guide/sql-geospatial-functions/geocrosses', + '/docs/4.1/sql-guide/sql-geospatial-functions/geodifference', + '/docs/4.1/sql-guide/sql-geospatial-functions/geodistance', + '/docs/4.1/sql-guide/sql-geospatial-functions/geoequal', + '/docs/4.1/sql-guide/sql-geospatial-functions/geolength', + '/docs/4.1/sql-guide/sql-geospatial-functions/geonear', + ], + to: '/reference/v4/database/sql', + }, + + // ── Configuration / Deployments ─────────────────────────────────────────── + { + from: [ + '/docs/4.1/configuration', + '/docs/4.2/deployments/configuration', + '/docs/4.3/deployments/configuration', + '/docs/4.4/deployments/configuration', + '/docs/4.5/deployments/configuration', + '/docs/4.6/deployments/configuration', + ], + to: '/reference/v4/configuration/overview', + }, + + // ── CLI ─────────────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/harperdb-cli', + '/docs/4.1/deployments/harperdb-cli', + '/docs/4.2/deployments/harperdb-cli', + '/docs/4.3/deployments/harperdb-cli', + '/docs/4.4/deployments/harper-cli', + '/docs/4.5/deployments/harper-cli', + '/docs/4.6/deployments/harper-cli', + ], + to: '/reference/v4/cli/overview', + }, + + // ── Install / Upgrade (no equivalent page, send to learn) ───────────────── + { + from: [ + '/docs/4.1/deployments/install-harperdb', + '/docs/4.1/deployments/install-harperdb/linux', + '/docs/4.1/install-harperdb', + '/docs/4.1/install-harperdb/linux', + '/docs/4.2/deployments/install-harperdb', + '/docs/4.2/deployments/install-harperdb/linux', + '/docs/4.2/install-harperdb', + '/docs/4.3/deployments/install-harperdb', + '/docs/4.3/deployments/install-harperdb/linux', + '/docs/4.3/install-harperdb', + '/docs/4.4/deployments/install-harperdb', + '/docs/4.4/deployments/install-harperdb/linux', + '/docs/4.4/deployments/install-harper', + '/docs/4.4/deployments/install-harper/linux', + '/docs/4.5/deployments/install-harper', + '/docs/4.5/deployments/install-harper/linux', + '/docs/4.6/deployments/install-harper', + '/docs/4.6/deployments/install-harper/linux', + '/docs/4.1/deployments/upgrade-hdb-instance', + '/docs/4.2/deployments/upgrade-hdb-instance', + '/docs/4.3/deployments/upgrade-hdb-instance', + '/docs/4.4/deployments/upgrade-hdb-instance', + '/docs/4.5/deployments/upgrade-hdb-instance', + '/docs/4.6/deployments/upgrade-hdb-instance', + '/docs/4.1/upgrade-hdb-instance', + ], + to: '/learn/getting-started/install-and-connect-harper', + }, + { from: '/docs/4.2/deployments/', to: '/reference/v4' }, + + // ── Cloud ───────────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/harperdb-cloud', + '/docs/4.1/harperdb-cloud/alarms', + '/docs/4.1/harperdb-cloud/iops-impact', + '/docs/4.1/harperdb-cloud/verizon-5g-wavelength-instances', + '/docs/4.2/deployments/harperdb-cloud', + '/docs/4.2/deployments/harperdb-cloud/alarms', + '/docs/4.2/deployments/harperdb-cloud/instance-size-hardware-specs', + '/docs/4.2/deployments/harperdb-cloud/iops-impact', + '/docs/4.2/deployments/harperdb-cloud/verizon-5g-wavelength-instances', + '/docs/4.3/deployments/harperdb-cloud', + '/docs/4.3/deployments/harperdb-cloud/alarms', + '/docs/4.3/deployments/harperdb-cloud/instance-size-hardware-specs', + '/docs/4.3/deployments/harperdb-cloud/iops-impact', + '/docs/4.3/deployments/harperdb-cloud/verizon-5g-wavelength-instances', + '/docs/4.4/deployments/harperdb-cloud/alarms', + '/docs/4.4/deployments/harperdb-cloud/instance-size-hardware-specs', + '/docs/4.4/deployments/harperdb-cloud/iops-impact', + '/docs/4.4/deployments/harperdb-cloud/verizon-5g-wavelength-instances', + '/docs/4.4/deployments/harper-cloud/', + '/docs/4.4/deployments/harper-cloud/alarms', + '/docs/4.4/deployments/harper-cloud/instance-size-hardware-specs', + '/docs/4.4/deployments/harper-cloud/iops-impact', + '/docs/4.4/deployments/harper-cloud/verizon-5g-wavelength-instances', + '/docs/4.5/deployments/harper-cloud/alarms', + '/docs/4.5/deployments/harper-cloud/verizon-5g-wavelength-instances', + '/docs/4.6/deployments/harper-cloud', + '/docs/4.6/deployments/harper-cloud/alarms', + '/docs/4.6/deployments/harper-cloud/instance-size-hardware-specs', + '/docs/4.6/deployments/harper-cloud/verizon-5g-wavelength-instances', + ], + to: '/reference/v4/legacy/cloud', + }, + + // ── Studio (harper-studio) ───────────────────────────────────────────────── + { + from: [ + '/docs/4.1/harperdb-studio', + '/docs/4.1/harperdb-studio/create-account', + '/docs/4.1/harperdb-studio/enable-mixed-content', + '/docs/4.1/harperdb-studio/instance-configuration', + '/docs/4.1/harperdb-studio/instance-example-code', + '/docs/4.1/harperdb-studio/instance-metrics', + '/docs/4.1/harperdb-studio/instances', + '/docs/4.1/harperdb-studio/login-password-reset', + '/docs/4.1/harperdb-studio/manage-charts', + '/docs/4.1/harperdb-studio/manage-clustering', + '/docs/4.1/harperdb-studio/manage-functions', + '/docs/4.1/harperdb-studio/manage-instance-roles', + '/docs/4.1/harperdb-studio/manage-instance-users', + '/docs/4.1/harperdb-studio/manage-schemas-browse-data', + '/docs/4.1/harperdb-studio/organizations', + '/docs/4.1/harperdb-studio/query-instance-data', + '/docs/4.1/harperdb-studio/resources', + '/docs/4.2/harperdb-studio', + '/docs/4.2/harperdb-studio/enable-mixed-content', + '/docs/4.2/harperdb-studio/manage-functions', + '/docs/4.2/harperdb-studio/manage-schemas-browse-data', + '/docs/4.3/harperdb-studio', + '/docs/4.3/harperdb-studio/enable-mixed-content', + '/docs/4.3/harperdb-studio/instance-metrics', + '/docs/4.3/harperdb-studio/login-password-reset', + '/docs/4.3/harperdb-studio/manage-schemas-browse-data', + '/docs/4.4/harperdb-studio/manage-schemas-browse-data', + '/docs/4.4/administration/harperdb-studio', + '/docs/4.4/administration/harperdb-studio/create-account', + '/docs/4.4/administration/harperdb-studio/enable-mixed-content', + '/docs/4.4/administration/harperdb-studio/instance-configuration', + '/docs/4.4/administration/harperdb-studio/instance-metrics', + '/docs/4.4/administration/harperdb-studio/instances', + '/docs/4.4/administration/harperdb-studio/login-password-reset', + '/docs/4.4/administration/harperdb-studio/manage-applications', + '/docs/4.4/administration/harperdb-studio/manage-charts', + '/docs/4.4/administration/harperdb-studio/manage-databases-browse-data', + '/docs/4.4/administration/harperdb-studio/manage-instance-roles', + '/docs/4.4/administration/harperdb-studio/manage-instance-users', + '/docs/4.4/administration/harperdb-studio/manage-replication', + '/docs/4.4/administration/harperdb-studio/organizations', + '/docs/4.4/administration/harperdb-studio/query-instance-data', + '/docs/4.5/administration/harperdb-studio', + '/docs/4.5/administration/harperdb-studio/manage-charts', + '/docs/4.5/administration/harperdb-studio/manage-clustering', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.4/administration/harper-studio', + '/docs/4.4/administration/harper-studio/create-account', + '/docs/4.4/administration/harper-studio/instance-configuration', + '/docs/4.4/administration/harper-studio/instance-metrics', + '/docs/4.4/administration/harper-studio/instances', + '/docs/4.4/administration/harper-studio/login-password-reset', + '/docs/4.4/administration/harper-studio/manage-applications', + '/docs/4.4/administration/harper-studio/manage-databases-browse-data', + '/docs/4.4/administration/harper-studio/manage-instance-roles', + '/docs/4.4/administration/harper-studio/manage-instance-users', + '/docs/4.4/administration/harper-studio/manage-replication', + '/docs/4.4/administration/harper-studio/organizations', + '/docs/4.5/administration/harper-studio', + '/docs/4.5/administration/harper-studio/create-account', + '/docs/4.5/administration/harper-studio/enable-mixed-content', + '/docs/4.5/administration/harper-studio/instance-configuration', + '/docs/4.5/administration/harper-studio/instance-metrics', + '/docs/4.5/administration/harper-studio/instances', + '/docs/4.5/administration/harper-studio/login-password-reset', + '/docs/4.5/administration/harper-studio/manage-applications', + '/docs/4.5/administration/harper-studio/manage-databases-browse-data', + '/docs/4.5/administration/harper-studio/manage-instance-roles', + '/docs/4.5/administration/harper-studio/manage-instance-users', + '/docs/4.5/administration/harper-studio/manage-replication', + '/docs/4.5/administration/harper-studio/organizations', + '/docs/4.5/administration/harper-studio/query-instance-data', + '/docs/4.6/administration/harper-studio', + '/docs/4.6/administration/harper-studio/create-account', + '/docs/4.6/administration/harper-studio/enable-mixed-content', + '/docs/4.6/administration/harper-studio/instance-configuration', + '/docs/4.6/administration/harper-studio/instance-metrics', + '/docs/4.6/administration/harper-studio/instances', + '/docs/4.6/administration/harper-studio/manage-applications', + '/docs/4.6/administration/harper-studio/manage-databases-browse-data', + '/docs/4.6/administration/harper-studio/manage-instance-roles', + '/docs/4.6/administration/harper-studio/manage-instance-users', + '/docs/4.6/administration/harper-studio/manage-replication', + '/docs/4.6/administration/harper-studio/organizations', + '/docs/4.6/administration/harper-studio/query-instance-data', + ], + to: '/reference/v4/studio/overview', + }, + + // ── Logging ─────────────────────────────────────────────────────────────── + { + from: [ + '/docs/4.1/administration/logging', + '/docs/4.2/administration/logging', + '/docs/4.3/administration/logging', + '/docs/4.4/administration/logging', + '/docs/4.5/administration/logging', + '/docs/4.6/administration/logging', + '/docs/4.1/administration/logging/audit-logging', + '/docs/4.2/administration/logging/audit-logging', + '/docs/4.3/administration/logging/audit-logging', + '/docs/4.4/administration/logging/audit-logging', + '/docs/4.5/administration/logging/audit-logging', + '/docs/4.6/administration/logging/audit-logging', + '/docs/4.1/administration/logging/logging', + '/docs/4.2/administration/logging/logging', + '/docs/4.3/administration/logging/logging', + '/docs/4.4/administration/logging/logging', + '/docs/4.1/administration/logging/standard-logging', + '/docs/4.2/administration/logging/standard-logging', + '/docs/4.4/administration/logging/standard-logging', + '/docs/4.5/administration/logging/standard-logging', + '/docs/4.6/administration/logging/standard-logging', + '/docs/4.1/administration/logging/transaction-logging', + '/docs/4.2/administration/logging/transaction-logging', + '/docs/4.3/administration/logging/transaction-logging', + '/docs/4.4/administration/logging/transaction-logging', + '/docs/4.5/administration/logging/transaction-logging', + '/docs/4.6/administration/logging/transaction-logging', + '/docs/4.1/logging', + '/docs/4.1/audit-logging', + '/docs/4.1/transaction-logging', + ], + to: '/reference/v4/logging/overview', + }, + + // ── Administration (misc) ───────────────────────────────────────────────── + { + from: [ + '/docs/4.1/administration', + '/docs/4.2/administration', + '/docs/4.3/administration', + '/docs/4.4/administration', + '/docs/4.5/administration', + '/docs/4.6/administration', + '/docs/4.1/administration/administration', + '/docs/4.2/administration/administration', + '/docs/4.3/administration/administration', + '/docs/4.4/administration/administration', + '/docs/4.5/administration/administration', + ], + to: '/reference/v4', + }, + { + from: [ + '/docs/4.1/administration/cloning', + '/docs/4.2/administration/cloning', + '/docs/4.3/administration/cloning', + '/docs/4.4/administration/cloning', + '/docs/4.5/administration/cloning', + '/docs/4.6/administration/cloning', + ], + to: '/reference/v4/replication/overview', + }, + { + from: [ + '/docs/4.3/administration/compact', + '/docs/4.4/administration/compact', + '/docs/4.5/administration/compact', + '/docs/4.6/administration/compact', + ], + to: '/reference/v4/database/compaction', + }, + { + from: [ + '/docs/4.1/administration/jobs', + '/docs/4.2/administration/jobs', + '/docs/4.3/administration/jobs', + '/docs/4.4/administration/jobs', + '/docs/4.5/administration/jobs', + '/docs/4.1/jobs', + ], + to: '/reference/v4/database/jobs', + }, + + // ── Custom Functions (legacy) ───────────────────────────────────────────── + { + from: [ + '/docs/4.1/custom-functions', + '/docs/4.1/custom-functions/create-project', + '/docs/4.1/custom-functions/custom-functions-operations', + '/docs/4.1/custom-functions/debugging-custom-function', + '/docs/4.1/custom-functions/define-helpers', + '/docs/4.1/custom-functions/define-routes', + '/docs/4.1/custom-functions/example-projects', + '/docs/4.1/custom-functions/host-static', + '/docs/4.1/custom-functions/requirements-definitions', + '/docs/4.1/custom-functions/restarting-server', + '/docs/4.1/custom-functions/templates', + '/docs/4.1/custom-functions/using-npm-git', + ], + to: '/reference/v4/legacy/custom-functions', + }, + + // ── Old /docs/4.X/reference/* ───────────────────────────────────────────── + { + from: [ + '/docs/4.1/reference', + '/docs/4.2/reference', + '/docs/4.3/reference', + '/docs/4.4/reference', + '/docs/4.5/reference', + '/docs/4.6/reference', + ], + to: '/reference/v4', + }, + { + from: [ + '/docs/4.1/reference/globals', + '/docs/4.2/reference/globals', + '/docs/4.3/reference/globals', + '/docs/4.4/reference/globals', + '/docs/4.5/reference/globals', + '/docs/4.6/reference/globals', + ], + to: '/reference/v4/components/javascript-environment', + }, + { + from: [ + '/docs/4.1/reference/content-types', + '/docs/4.2/reference/content-types', + '/docs/4.3/reference/content-types', + '/docs/4.4/reference/content-types', + '/docs/4.5/reference/content-types', + '/docs/4.6/reference/content-types', + ], + to: '/reference/v4/rest/content-types', + }, + { + from: [ + '/docs/4.1/reference/headers', + '/docs/4.2/reference/headers', + '/docs/4.5/reference/headers', + '/docs/4.6/reference/headers', + ], + to: '/reference/v4/rest/headers', + }, + { + from: [ + '/docs/4.1/reference/data-types', + '/docs/4.2/reference/data-types', + '/docs/4.3/reference/data-types', + '/docs/4.4/reference/data-types', // note: reference/content-types also in 4.4 + '/docs/4.5/reference/data-types', + '/docs/4.6/reference/data-types', + '/docs/4.1/reference/dynamic-schema', + '/docs/4.2/reference/dynamic-schema', + '/docs/4.3/reference/dynamic-schema', + '/docs/4.4/reference/dynamic-schema', + '/docs/4.5/reference/dynamic-schema', + '/docs/4.6/reference/dynamic-schema', + '/docs/4.1/reference/limits', + '/docs/4.2/reference/limits', + '/docs/4.3/reference/limits', + '/docs/4.4/reference/limits', + '/docs/4.5/reference/limits', + '/docs/4.6/reference/limits', + ], + to: '/reference/v4/database/schema', + }, + { + from: ['/docs/4.5/reference/blob', '/docs/4.6/reference/blob'], + to: '/reference/v4/database/schema', + }, + { + from: [ + '/docs/4.5/reference/transactions', + '/docs/4.6/reference/transactions', + '/docs/4.2/reference/transactions', + '/docs/4.3/reference/transactions', + ], + to: '/reference/v4/database/transaction', + }, + { + from: ['/docs/4.5/reference/graphql', '/docs/4.6/reference/graphql'], + to: '/reference/v4/graphql-querying/overview', + }, + { + from: [ + '/docs/4.1/reference/storage-algorithm', + '/docs/4.2/reference/storage-algorithm', + '/docs/4.3/reference/storage-algorithm', + '/docs/4.4/reference/storage-algorithm', + '/docs/4.5/reference/storage-algorithm', + '/docs/4.6/reference/storage-algorithm', + ], + to: '/reference/v4/database/storage-algorithm', + }, + { + from: [ + '/docs/4.2/reference/analytics', + '/docs/4.3/reference/analytics', + '/docs/4.4/reference/analytics', + '/docs/4.5/reference/analytics', + '/docs/4.6/reference/analytics', + ], + to: '/reference/v4/analytics/overview', + }, + { + from: [ + '/docs/4.2/reference/architecture', + '/docs/4.3/reference/architecture', + '/docs/4.4/reference/architecture', + '/docs/4.5/reference/architecture', + '/docs/4.6/reference/architecture', + ], + to: '/reference/v4', + }, + { + from: [ + '/docs/4.2/reference/resource', + '/docs/4.3/reference/resource', + '/docs/4.4/reference/resource', + '/docs/4.5/reference/resource', + '/docs/4.6/reference/resource', + ], + to: '/reference/v4/resources/overview', + }, + { + from: [ + '/docs/4.5/reference/resources', + '/docs/4.6/reference/resources', + '/docs/4.5/reference/query-optimization', + '/docs/4.6/reference/query-optimization', + '/docs/4.4/reference/query-optimization', + ], + to: '/reference/v4/resources/overview', + }, + { + from: ['/docs/4.6/reference/resources/instance-binding', '/docs/4.6/reference/resources/migration'], + to: '/reference/v4/resources/resource-api', + }, + { + from: ['/docs/4.6/reference/resources/query-optimization'], + to: '/reference/v4/resources/query-optimization', + }, + { + from: ['/docs/4.4/reference/roles', '/docs/4.5/reference/roles', '/docs/4.6/reference/roles'], + to: '/reference/v4/users-and-roles/overview', + }, + { + from: [ + '/docs/4.2/reference/clustering', + '/docs/4.3/reference/clustering', + '/docs/4.4/reference/clustering', + '/docs/4.5/reference/clustering/certificate-management', + '/docs/4.5/reference/clustering/enabling-clustering', + '/docs/4.5/reference/clustering/naming-a-node', + '/docs/4.5/reference/clustering/requirements-and-definitions', + '/docs/4.5/reference/clustering/things-worth-knowing', + '/docs/4.6/reference/clustering', + '/docs/4.6/reference/clustering/certificate-management', + '/docs/4.6/reference/clustering/subscription-overview', + '/docs/4.6/reference/clustering/things-worth-knowing', + '/docs/4.2/reference/clustering/creating-a-cluster-user', + '/docs/4.2/reference/clustering/naming-a-node', + '/docs/4.2/reference/clustering/subscription-overview', + '/docs/4.2/reference/clustering/things-worth-knowing', + '/docs/4.3/reference/clustering/enabling-clustering', + '/docs/4.3/reference/clustering/managing-subscriptions', + '/docs/4.3/reference/clustering/naming-a-node', + '/docs/4.4/reference/clustering/creating-a-cluster-user', + '/docs/4.4/reference/clustering/enabling-clustering', + '/docs/4.4/reference/clustering/naming-a-node', + '/docs/4.4/reference/clustering/requirements-and-definitions', + '/docs/4.4/reference/clustering/things-worth-knowing', + ], + to: '/reference/v4/replication/clustering', + }, + { + from: [ + '/docs/4.6/reference/components', + '/docs/4.6/reference/components/applications', + '/docs/4.6/reference/components/built-in-extensions', + '/docs/4.6/reference/components/configuration', + '/docs/4.6/reference/components/extensions', + '/docs/4.6/reference/components/plugins', + ], + to: '/reference/v4/components/overview', + }, + + // ── Old /docs/4.X/technical-details/reference/* ─────────────────────────── + { + from: [ + '/docs/4.1/technical-details/reference/analytics', + '/docs/4.2/technical-details/reference/analytics', + '/docs/4.3/technical-details/reference/analytics', + '/docs/4.4/technical-details/reference/analytics', + '/docs/4.5/technical-details/reference/analytics', + '/docs/4.6/technical-details/reference/analytics', + ], + to: '/reference/v4/analytics/overview', + }, + { + from: [ + '/docs/4.1/technical-details/reference/architecture', + '/docs/4.2/technical-details/reference/architecture', + '/docs/4.3/technical-details/reference/architecture', + '/docs/4.4/technical-details/reference/architecture', + '/docs/4.5/technical-details/reference/architecture', + '/docs/4.6/technical-details/reference/architecture', + ], + to: '/reference/v4', + }, + { + from: [ + '/docs/4.1/technical-details/reference/globals', + '/docs/4.2/technical-details/reference/globals', + '/docs/4.3/technical-details/reference/globals', + '/docs/4.4/technical-details/reference/globals', + ], + to: '/reference/v4/components/javascript-environment', + }, + { + from: [ + '/docs/4.1/technical-details/reference/data-types', + '/docs/4.2/technical-details/reference/content-types', + '/docs/4.3/technical-details/reference/content-types', + '/docs/4.4/technical-details/reference/content-types', + '/docs/4.5/technical-details/reference/data-types', + '/docs/4.6/technical-details/reference/content-types', + '/docs/4.6/technical-details/reference/data-types', + '/docs/4.6/technical-details/reference/dynamic-schema', + '/docs/4.3/technical-details/reference/data-types', + '/docs/4.4/technical-details/reference/data-types', + '/docs/4.3/technical-details/reference/dynamic-schema', + '/docs/4.4/technical-details/reference/dynamic-schema', + '/docs/4.2/technical-details/reference/limits', + '/docs/4.3/technical-details/reference/limits', + '/docs/4.4/technical-details/reference/limits', + '/docs/4.5/technical-details/reference/blob', + ], + to: '/reference/v4/database/schema', + }, + { + from: [ + '/docs/4.1/technical-details/reference/storage-algorithm', + '/docs/4.2/technical-details/reference/storage-algorithm', + '/docs/4.3/technical-details/reference/storage-algorithm', + '/docs/4.4/technical-details/reference/storage-algorithm', + '/docs/4.5/technical-details/reference/storage-algorithm', + '/docs/4.6/technical-details/reference/storage-algorithm', + ], + to: '/reference/v4/database/storage-algorithm', + }, + { + from: [ + '/docs/4.1/technical-details/reference/graphql', + '/docs/4.2/technical-details/reference/graphql', + '/docs/4.3/technical-details/reference/graphql', // inferred from pattern + '/docs/4.5/technical-details/reference/graphql', + '/docs/4.6/technical-details/reference/graphql', // inferred + ], + to: '/reference/v4/graphql-querying/overview', + }, + { + from: [ + '/docs/4.1/technical-details/reference/transactions', + '/docs/4.2/technical-details/reference/transactions', + '/docs/4.3/technical-details/reference/transactions', + '/docs/4.4/technical-details/reference/transactions', + '/docs/4.5/technical-details/reference/transactions', + '/docs/4.6/technical-details/reference/transactions', + ], + to: '/reference/v4/database/transaction', + }, + { + from: [ + '/docs/4.1/technical-details/reference/resource', + '/docs/4.2/technical-details/reference/resource', + '/docs/4.3/technical-details/reference/resource', + '/docs/4.4/technical-details/reference/resource', + '/docs/4.6/technical-details/reference/resource', + '/docs/4.6/technical-details/reference/resources', + '/docs/4.6/technical-details/reference/resources/instance-binding', + '/docs/4.6/technical-details/reference/resources/migration', + ], + to: '/reference/v4/resources/overview', + }, + { + from: [ + '/docs/4.6/technical-details/reference/components', + '/docs/4.6/technical-details/reference/components/applications', + '/docs/4.6/technical-details/reference/components/built-in-extensions', + '/docs/4.6/technical-details/reference/components/configuration', + '/docs/4.6/technical-details/reference/components/plugins', + ], + to: '/reference/v4/components/overview', + }, + { + from: [ + '/docs/4.2/technical-details/reference', + '/docs/4.3/technical-details/reference', + '/docs/4.4/technical-details/reference', + '/docs/4.5/technical-details/reference', + '/docs/4.6/technical-details/reference', + '/docs/4.6/technical-details/', + '/docs/4.3/technical-details/', + '/docs/4.4/technical-details/', + '/docs/4.2/technical-details/', + ], + to: '/reference/v4', + }, + + // ── Old /docs/4.X/add-ons-and-sdks/* ───────────────────────────────────── + { + from: ['/docs/4.1/add-ons-and-sdks', '/docs/4.1/add-ons-and-sdks/google-data-studio'], + to: '/reference/v4/components/overview', + }, + + // ── Support ─────────────────────────────────────────────────────────────── + { from: ['/docs/4.1/support'], to: '/reference/v4' }, + + // ── Release notes (versioned) ───────────────────────────────────────────── + // Paths seen in analytics under /docs/4.X/release-notes/* and + // /docs/4.X/technical-details/release-notes/* + // Old format: /docs/4.X/release-notes/{codename}/{semver} + // New format: /release-notes/{codename}/{semver} + { from: ['/docs/4.1/release-notes', '/docs/4.2/release-notes', '/docs/4.3/release-notes'], to: '/release-notes' }, + + // 1.alby series + { + from: [ + '/docs/4.1/release-notes/1.alby/1.1.0', + '/docs/4.1/technical-details/release-notes/4.tucker/1.2.0', + '/docs/4.3/technical-details/release-notes/4.tucker/1.1.0', + '/docs/4.3/technical-details/release-notes/4.tucker/1.2.0', + '/docs/4.3/technical-details/release-notes/4.tucker/1.3.0', + '/docs/4.4/technical-details/release-notes/4.tucker/1.2.0', + '/docs/4.4/technical-details/release-notes/4.tucker/1.3.0', + '/docs/4.2/technical-details/release-notes/4.tucker/1.2.0', + '/docs/4.2/technical-details/release-notes/4.tucker/1.3.0', + ], + to: '/release-notes', + }, + { + from: ['/docs/4.1/release-notes/1.alby/1.2.0', '/docs/4.1/release-notes/1.alby/1.3.0'], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/1.alby/1.3.1', + '/docs/4.3/technical-details/release-notes/4.tucker/1.3.1', + '/docs/4.4/technical-details/release-notes/4.tucker/1.alby', + '/docs/4.3/technical-details/release-notes/4.tucker/1.alby', + '/docs/4.5/technical-details/release-notes/4.tucker/1.alby', + '/docs/4.2/technical-details/release-notes/4.tucker/1.3.1', + ], + to: '/release-notes/v1-alby/1.3.1', + }, + + // 2.penny series + { + from: [ + '/docs/4.1/release-notes/2.penny/2.1.1', + '/docs/4.3/technical-details/release-notes/4.tucker/2.1.1', + '/docs/4.5/technical-details/release-notes/4.tucker/2.1.1', + ], + to: '/release-notes/v2-penny/2.1.1', + }, + { + from: [ + '/docs/4.1/release-notes/2.penny/2.2.0', + '/docs/4.3/technical-details/release-notes/4.tucker/2.2.0', + '/docs/4.4/technical-details/release-notes/4.tucker/2.2.0', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/2.penny/2.2.2', + '/docs/4.1/technical-details/release-notes/4.tucker/2.penny/2.2.2', + '/docs/4.3/technical-details/release-notes/4.tucker/2.2.2', + '/docs/4.3/technical-details/release-notes/4.tucker/2.penny/2.2.2', + '/docs/4.4/technical-details/release-notes/4.tucker/2.2.2', + '/docs/4.5/technical-details/release-notes/4.tucker/2.2.2', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/2.penny/2.2.3', + '/docs/4.2/release-notes/2.penny/2.2.3', + '/docs/4.3/release-notes/2.penny/2.2.3', + '/docs/4.3/technical-details/release-notes/4.tucker/2.2.3', + ], + to: '/release-notes', + }, + { + from: ['/docs/4.1/release-notes/2.penny/2.3.0', '/docs/4.3/technical-details/release-notes/4.tucker/2.3.0'], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/2.penny/2.3.1', + '/docs/4.2/release-notes/2.penny/2.3.1', + '/docs/4.3/release-notes/2.penny/2.3.1', + '/docs/4.3/technical-details/release-notes/4.tucker/2.3.1', + '/docs/4.4/technical-details/release-notes/4.tucker/2.3.1', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.3/technical-details/release-notes/4.tucker/2.penny', + '/docs/4.4/technical-details/release-notes/4.tucker/2.penny', + '/docs/4.5/technical-details/release-notes/4.tucker/2.penny', + ], + to: '/release-notes', + }, + + // 3.monkey series + { + from: [ + '/docs/4.1/release-notes/3.monkey', + '/docs/4.2/release-notes/3.monkey', + '/docs/4.3/release-notes/3.monkey/3.3.0', + '/docs/4.3/technical-details/release-notes/4.tucker/3.monkey', + '/docs/4.4/technical-details/release-notes/4.tucker/3.monkey', + '/docs/4.5/technical-details/release-notes/4.tucker/3.monkey', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/3.monkey/3.0.0', + '/docs/4.3/technical-details/release-notes/4.tucker/3.0.0', + '/docs/4.4/technical-details/release-notes/4.tucker/3.0.0', + ], + to: '/release-notes/v3-monkey/3.0.0', + }, + { + from: [ + '/docs/4.3/technical-details/release-notes/4.tucker/3.1.0', + '/docs/4.4/technical-details/release-notes/4.tucker/3.1.0', + '/docs/4.3/technical-details/release-notes/4.tucker/3.1.1', + '/docs/4.4/technical-details/release-notes/4.tucker/3.1.1', + '/docs/4.3/technical-details/release-notes/4.tucker/3.1.2', + '/docs/4.1/technical-details/release-notes/4.tucker/3.1.2', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/3.monkey/3.1.3', + '/docs/4.3/technical-details/release-notes/4.tucker/3.1.3', + '/docs/4.4/technical-details/release-notes/4.tucker/3.1.3', + ], + to: '/release-notes/v3-monkey/3.1.3', + }, + { + from: [ + '/docs/4.1/release-notes/3.monkey/3.1.4', + '/docs/4.4/technical-details/release-notes/4.tucker/3.1.4', + '/docs/4.5/technical-details/release-notes/4.tucker/3.1.5', + '/docs/4.3/technical-details/release-notes/4.tucker/3.1.5', + '/docs/4.4/technical-details/release-notes/4.tucker/3.1.5', + ], + to: '/release-notes', + }, + { + from: ['/docs/4.1/release-notes/3.monkey/3.1.5'], + to: '/release-notes/v3-monkey/3.1.5', + }, + { + from: [ + '/docs/4.3/technical-details/release-notes/4.tucker/3.2.0', + '/docs/4.4/technical-details/release-notes/4.tucker/3.2.1', + ], + to: '/release-notes', + }, + { + from: ['/docs/4.1/release-notes/3.monkey/3.2.1', '/docs/4.1/technical-details/release-notes/4.tucker/3.2.1'], + to: '/release-notes/v3-monkey/3.2.1', + }, + { + from: [ + '/docs/4.1/release-notes/3.monkey/3.3.0', + '/docs/4.2/release-notes/3.monkey/3.3.0', + '/docs/4.3/technical-details/release-notes/4.tucker/3.3.0', + '/docs/4.4/technical-details/release-notes/4.tucker/3.3.0', + '/docs/4.5/technical-details/release-notes/4.tucker/3.3.0', + ], + to: '/release-notes/v3-monkey/3.3.0', + }, + + // 4.tucker series + { + from: ['/docs/4.1/release-notes/4.tucker'], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/4.tucker/4.0.0', + '/docs/4.1/release-notes/4.tucker/4.0.2', + '/docs/4.1/release-notes/4.tucker/4.0.3', + '/docs/4.1/release-notes/4.tucker/4.0.4', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/4.tucker/4.0.5', + '/docs/4.2/release-notes/4.tucker/4.0.5', + '/docs/4.3/release-notes/4.tucker/4.0.5', + ], + to: '/release-notes', + }, + { + from: [ + '/docs/4.1/release-notes/4.tucker/4.0.6', + '/docs/4.2/release-notes/4.tucker/4.0.6', + '/docs/4.3/release-notes/4.tucker/4.0.6', + ], + to: '/release-notes', + }, + { + from: ['/docs/4.1/release-notes/4.tucker/4.1.0'], + to: '/release-notes', + }, + { + from: [ + '/docs/4.4/technical-details/release-notes/4.tucker/4.4.25', + '/docs/4.4/technical-details/release-notes/4.tucker/4.4.26', + ], + to: '/release-notes', + }, + + // ── Administration / Studio (harperdb-studio under /docs/4.X/administration) ── + { + from: [ + '/docs/4.1/administration/harperdb-studio', + '/docs/4.2/administration/harperdb-studio', + '/docs/4.3/administration/harperdb-studio', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.1/administration/harperdb-studio/create-account', + '/docs/4.2/administration/harperdb-studio/create-account', + '/docs/4.3/administration/harperdb-studio/create-account', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.1/administration/harperdb-studio/instances', + '/docs/4.2/administration/harperdb-studio/instances', + '/docs/4.3/administration/harperdb-studio/instances', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.1/administration/harperdb-studio/login-password-reset', + '/docs/4.3/administration/harperdb-studio/login-password-reset', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/instance-configuration', + '/docs/4.3/administration/harperdb-studio/instance-configuration', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/instance-metrics', + '/docs/4.3/administration/harperdb-studio/instance-metrics', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/instance-example-code', + '/docs/4.3/administration/harperdb-studio/instance-example-code', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/manage-schemas-browse-data', + '/docs/4.1/administration/harperdb-studio/manage-schemas-browse-data', + '/docs/4.3/administration/harperdb-studio/manage-databases-browse-data', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.1/administration/harperdb-studio/manage-instance-users', + '/docs/4.2/administration/harperdb-studio/manage-instance-users', + '/docs/4.3/administration/harperdb-studio/manage-instance-users', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/manage-instance-roles', + '/docs/4.3/administration/harperdb-studio/manage-instance-roles', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/manage-functions', + '/docs/4.3/administration/harperdb-studio/manage-functions', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/manage-charts', + '/docs/4.3/administration/harperdb-studio/manage-charts', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/manage-clustering', + '/docs/4.3/administration/harperdb-studio/manage-clustering', + ], + to: '/reference/v4/studio/overview', + }, + { from: ['/docs/4.3/administration/harperdb-studio/manage-replication'], to: '/reference/v4/studio/overview' }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/manage-applications', + '/docs/4.3/administration/harperdb-studio/manage-applications', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.1/administration/harperdb-studio/enable-mixed-content', + '/docs/4.2/administration/harperdb-studio/enable-mixed-content', + '/docs/4.3/administration/harperdb-studio/enable-mixed-content', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.1/administration/harperdb-studio/organizations', + '/docs/4.2/administration/harperdb-studio/organizations', + '/docs/4.3/administration/harperdb-studio/organizations', + ], + to: '/reference/v4/studio/overview', + }, + { + from: [ + '/docs/4.2/administration/harperdb-studio/query-instance-data', + '/docs/4.3/administration/harperdb-studio/query-instance-data', + ], + to: '/reference/v4/studio/overview', + }, + + // ── Clustering (4.1 / 4.2 paths not already covered above) ─────────────── + { + from: [ + '/docs/4.2/developers/clustering', + '/docs/4.2/developers/clustering/things-worth-knowing', + '/docs/4.2/developers/clustering/establishing-routes', + '/docs/4.2/developers/clustering/managing-subscriptions', + '/docs/4.2/developers/clustering/subscription-overview', + '/docs/4.2/developers/clustering/requirements-and-definitions', + '/docs/4.2/developers/clustering/naming-a-node', + ], + to: '/reference/v4/replication/clustering', + }, + { + from: [ + '/docs/4.1/developers/clustering/naming-a-node', + '/docs/4.1/developers/clustering/requirements-and-definitions', + '/docs/4.1/developers/clustering/subscription-overview', + '/docs/4.1/developers/clustering/things-worth-knowing', + ], + to: '/reference/v4/replication/clustering', + }, + { + from: [ + '/docs/4.1/developers/replication/clustering/creating-a-cluster-user', + '/docs/4.1/developers/replication/clustering/managing-subscriptions', + '/docs/4.1/developers/replication/clustering/naming-a-node', + '/docs/4.3/developers/replication/clustering/establishing-routes', + ], + to: '/reference/v4/replication/clustering', + }, + { + from: ['/docs/4.2/developers/operations-api/clustering', '/docs/4.3/developers/operations-api/clustering'], + to: '/reference/v4/replication/clustering', + }, + + // ── Technical details / Reference (additional paths) ────────────────────── + { + from: [ + '/docs/4.2/technical-details/reference/headers', + '/docs/4.3/technical-details/reference/headers', + '/docs/4.4/technical-details/reference/headers', + '/docs/4.5/technical-details/reference/headers', + ], + to: '/reference/v4/rest/headers', + }, + { from: ['/docs/4.3/reference/headers'], to: '/reference/v4/rest/headers' }, + + // ── Deployments / Cloud (4.1 paths) ─────────────────────────────────────── + { + from: [ + '/docs/4.1/deployments/harperdb-cloud', + '/docs/4.1/deployments/harperdb-cloud/instance-size-hardware-specs', + '/docs/4.1/deployments/harperdb-cloud/iops-impact', + '/docs/4.1/deployments/harperdb-cloud/verizon-5g-wavelength-instances', + ], + to: '/reference/v4/legacy/cloud', + }, + + // ── SQL (additional 4.2 path) ────────────────────────────────────────────── + { from: ['/docs/4.2/developers/sql-guide/reserved-word'], to: '/reference/v4/database/sql' }, + + // ── Release notes (additional versioned paths) ──────────────────────────── + { + from: ['/docs/4.1/release-notes/1.alby', '/docs/4.3/technical-details/release-notes/4.tucker/3.2.1'], + to: '/release-notes', + }, +]; diff --git a/memory/part5-redirects.md b/memory/part5-redirects.md index 8eeb27d6..70f63a01 100644 --- a/memory/part5-redirects.md +++ b/memory/part5-redirects.md @@ -13,28 +13,28 @@ The redirect plugin is currently commented out in `docusaurus.config.ts` (lines New reference paths live at `/reference/v4/[section]/[page]`: -| Section | Key Pages | -|----------------------|---------------------------------------------------------------------------| -| analytics | overview, operations | -| cli | overview, commands, authentication, operations-api-commands | -| components | overview, applications, extension-api, javascript-environment, plugin-api | -| configuration | overview, options, operations | -| database | overview, schema, api, data-loader, storage-algorithm, jobs, system-tables, compaction, transaction, sql | -| environment-variables| overview | -| fastify-routes | overview | -| graphql-querying | overview | -| http | overview, configuration, api, tls | -| legacy | cloud, custom-functions | -| logging | overview, configuration, api, operations | -| mqtt | overview, configuration | -| operations-api | overview, operations | -| replication | overview, clustering, sharding | -| resources | overview, resource-api, query-optimization | -| rest | overview, querying, headers, content-types, websockets, server-sent-events| -| security | overview, basic-authentication, jwt-authentication, mtls-authentication, certificate-management, certificate-verification, configuration, api | -| static-files | overview | -| studio | overview | -| users-and-roles | overview, configuration, operations | +| Section | Key Pages | +| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| analytics | overview, operations | +| cli | overview, commands, authentication, operations-api-commands | +| components | overview, applications, extension-api, javascript-environment, plugin-api | +| configuration | overview, options, operations | +| database | overview, schema, api, data-loader, storage-algorithm, jobs, system-tables, compaction, transaction, sql | +| environment-variables | overview | +| fastify-routes | overview | +| graphql-querying | overview | +| http | overview, configuration, api, tls | +| legacy | cloud, custom-functions | +| logging | overview, configuration, api, operations | +| mqtt | overview, configuration | +| operations-api | overview, operations | +| replication | overview, clustering, sharding | +| resources | overview, resource-api, query-optimization | +| rest | overview, querying, headers, content-types, websockets, server-sent-events | +| security | overview, basic-authentication, jwt-authentication, mtls-authentication, certificate-management, certificate-verification, configuration, api | +| static-files | overview | +| studio | overview | +| users-and-roles | overview, configuration, operations | ## Old Path Structure (v4.7) @@ -58,7 +58,7 @@ The old docs were at `/docs/` serving the latest (4.7) content: - `/docs/administration/cloning` → Replication - `/docs/administration/compact` → Database/compaction - `/docs/administration/jobs` → Database/jobs -- `/docs/reference/*` → Old reference section (reference/analytics, reference/resources/*, etc.) +- `/docs/reference/*` → Old reference section (reference/analytics, reference/resources/\*, etc.) - `/docs/foundations/*` → learn/ (already handled) - `/docs/getting-started/*` → learn/ (already handled) @@ -67,6 +67,7 @@ The old docs were at `/docs/` serving the latest (4.7) content: Paths from GA data (Oct 2025 – Feb 2026) that need redirects to `/reference/v4/`: ### High Priority (>200 views) + - `/docs/developers/operations-api` (1028) → `/reference/v4/operations-api/overview` - `/docs/developers/applications` (727) → `/reference/v4/components/overview` - `/docs/reference/resources` (667) → `/reference/v4/resources/overview` @@ -86,6 +87,7 @@ Paths from GA data (Oct 2025 – Feb 2026) that need redirects to `/reference/v4 - `/docs/developers/operations-api/bulk-operations` (158) → `/reference/v4/operations-api/operations` ### Medium Priority (50–200 views) + - `/docs/developers/applications/data-loader` (218) → `/reference/v4/database/data-loader` - `/docs/developers/operations-api/system-operations` (213) → `/reference/v4/operations-api/operations` - `/docs/reference/components/built-in-extensions` (204) → `/reference/v4/components/extension-api` @@ -168,6 +170,7 @@ Paths from GA data (Oct 2025 – Feb 2026) that need redirects to `/reference/v4 ## Versioned Doc Paths (/docs/4.X/) in Analytics Low traffic but some exist. Recommend a general catch-all pattern: + - `/docs/4.X/developers/...` → strip version prefix, apply same rules as `/docs/developers/...` - `/docs/4.X/reference/...` → strip version prefix, apply same rules as `/docs/reference/...` - Alternative: redirect `/docs/4.X/...` → `/docs/...` (simpler, single hop) @@ -175,19 +178,23 @@ Low traffic but some exist. Recommend a general catch-all pattern: ## Special Notes for Non-Reference Sections ### /learn/ — needs few/no new redirects + - Already has redirects for `/getting-started/*` and `/foundations/*` - `/learn/developers/coming-soon` and `/learn/administration/coming-soon` are real pages, no redirects needed ### /release-notes/ — existing redirects are fine + - The existing `createRedirects` logic for release-notes path variants (old naming) is worth keeping - No new redirects needed unless we change the release-notes structure ### /fabric/ — no redirects needed + - Brand new section with no old paths to redirect from ## Old redirects.ts Issues The existing file has: + 1. Many rules dragged from very old docs (HarperDB Studio → Harper Studio, HarperDB Cloud, custom-functions etc.) that are still valid but very old 2. `withBase()` abstraction that adds complexity — the basePath was used when docs were at `/docs/` but now everything is at root 3. Separate `generateRedirects()` and `createRedirects()` (wildcard) functions — the split is conceptually fine diff --git a/package.json b/package.json index 487704a9..755814ec 100644 --- a/package.json +++ b/package.json @@ -3,15 +3,15 @@ "version": "0.0.0", "private": true, "scripts": { - "prebuild": "node scripts/prebuild.js", "docusaurus": "docusaurus", - "start": "npm run prebuild && docusaurus start", - "dev": "npm run prebuild && docusaurus start", - "build": "npm run prebuild && docusaurus build && node scripts/postbuild.js", + "start": "docusaurus start", + "dev": "docusaurus start", + "prebuild": "node scripts/prebuild.js", + "build": "docusaurus build", "version": "docusaurus docs:version", "swizzle": "docusaurus swizzle", "deploy": "docusaurus deploy", - "clear": "npm run prebuild -- clean && docusaurus clear", + "clear": "docusaurus clear", "serve": "docusaurus serve", "typecheck": "tsc", "format": "prettier .", diff --git a/redirects.ts b/redirects.ts index 638f3fad..5b387cf8 100644 --- a/redirects.ts +++ b/redirects.ts @@ -1,235 +1,372 @@ // Redirect configuration for Docusaurus client-side redirects -// Based on pageview analytics (Oct 2025 – Feb 2026) from docs.harper.fast +// +// This file contains redirects for non-versioned /docs/* paths (i.e. paths +// without a /docs/4.X/ prefix). These represent traffic from the era when there +// was a single "current" version, so they redirect to the current reference +// (/reference/v5/). +// +// For versioned /docs/4.X/* paths, see historic-redirects.ts. // // Sections with NO redirects needed: // /reference/ — new section, no old paths point here -// /learn/ — already has redirects for /getting-started/* and /foundations/* -// /release-notes/ — existing createRedirects logic handles old naming variants +// /learn/ — existing redirects for /getting-started/* and /foundations/* +// /release-notes/ — static rules below cover all paths seen in analytics // /fabric/ — new section, no old paths +import { historicRedirects } from './historic-redirects'; + type RedirectRule = { to: string; from: string | string[]; }; -// ─── Static redirect rules ─────────────────────────────────────────────────── +// ─── Static redirect rules ──────────────────────────────────────────────────── // All paths sourced from GA pageview data (Oct 2025–Feb 2026). -// Paths with <10 views are marked LOW TRAFFIC and may be dropped in a future cleanup. +// Non-versioned /docs/* paths → /reference/v5/ (current version) +// Paths with <10 views are marked LOW TRAFFIC. -export const redirects: RedirectRule[] = [ +const currentRedirects: RedirectRule[] = [ // ── Docs root ────────────────────────────────────────────────────────────── - // Note: /docs and /docs/ cannot be redirected here because Docusaurus builds - // a real page at that path (docs/index.mdx). The index.mdx itself should - // handle sending users to the right place. - { from: '/docs/', to: '/' }, + { from: '/docs', to: '/' }, + { from: '/docs/category/developers', to: '/' }, // ── Getting Started / Foundations → Learn ───────────────────────────────── { from: '/docs/getting-started', to: '/learn' }, { from: '/docs/getting-started/quickstart', to: '/learn' }, { from: '/docs/getting-started/installation', to: '/learn/getting-started/install-and-connect-harper' }, { from: '/docs/getting-started/install-harper', to: '/learn/getting-started/install-and-connect-harper' }, - // LOW TRAFFIC (<10 views): { from: '/docs/getting-started/what-is-harper', to: '/learn' }, { from: '/docs/getting-started/harper-concepts', to: '/learn' }, + { from: '/docs/getting-started/first-harper-app', to: '/learn' }, { from: '/docs/foundations/harper-architecture', to: '/learn' }, { from: '/docs/foundations/core-concepts', to: '/learn' }, { from: '/docs/foundations/use-cases', to: '/learn' }, // ── Operations API ───────────────────────────────────────────────────────── - { from: '/docs/developers/operations-api', to: '/reference/v4/operations-api/overview' }, - { from: '/docs/developers/operations-api/nosql-operations', to: '/reference/v4/operations-api/operations' }, - { from: '/docs/developers/operations-api/databases-and-tables', to: '/reference/v4/database/overview' }, - { from: '/docs/developers/operations-api/components', to: '/reference/v4/operations-api/operations' }, - { from: '/docs/developers/operations-api/advanced-json-sql-examples', to: '/reference/v4/operations-api/operations' }, - { from: '/docs/developers/operations-api/bulk-operations', to: '/reference/v4/operations-api/operations' }, - { from: '/docs/developers/operations-api/system-operations', to: '/reference/v4/operations-api/operations' }, - { from: '/docs/developers/operations-api/configuration', to: '/reference/v4/configuration/operations' }, - { from: '/docs/developers/operations-api/users-and-roles', to: '/reference/v4/users-and-roles/operations' }, - { from: '/docs/developers/operations-api/analytics', to: '/reference/v4/analytics/operations' }, - { from: '/docs/developers/operations-api/quickstart-examples', to: '/reference/v4/operations-api/operations' }, - { from: '/docs/developers/operations-api/certificate-management', to: '/reference/v4/security/certificate-management' }, + { from: '/docs/developers/operations-api', to: '/reference/v5/operations-api/overview' }, + { from: '/docs/developers/operations-api/nosql-operations', to: '/reference/v5/operations-api/operations' }, + { from: '/docs/developers/operations-api/databases-and-tables', to: '/reference/v5/database/overview' }, + { from: '/docs/developers/operations-api/components', to: '/reference/v5/operations-api/operations' }, + { from: '/docs/developers/operations-api/advanced-json-sql-examples', to: '/reference/v5/operations-api/operations' }, + { from: '/docs/developers/operations-api/bulk-operations', to: '/reference/v5/operations-api/operations' }, + { from: '/docs/developers/operations-api/system-operations', to: '/reference/v5/operations-api/operations' }, + { from: '/docs/developers/operations-api/configuration', to: '/reference/v5/configuration/operations' }, + { from: '/docs/developers/operations-api/users-and-roles', to: '/reference/v5/users-and-roles/operations' }, + { from: '/docs/developers/operations-api/analytics', to: '/reference/v5/analytics/operations' }, + { from: '/docs/developers/operations-api/quickstart-examples', to: '/reference/v5/operations-api/operations' }, + { + from: '/docs/developers/operations-api/certificate-management', + to: '/reference/v5/security/certificate-management', + }, { from: '/docs/developers/operations-api/custom-functions', to: '/reference/v4/legacy/custom-functions' }, - { from: '/docs/developers/operations-api/jobs', to: '/reference/v4/database/jobs' }, - { from: '/docs/developers/operations-api/logs', to: '/reference/v4/logging/operations' }, - { from: '/docs/developers/operations-api/sql-operations', to: '/reference/v4/database/sql' }, - { from: '/docs/developers/operations-api/clustering-nats', to: '/reference/v4/replication/clustering' }, - { from: '/docs/developers/operations-api/clustering', to: '/reference/v4/replication/clustering' }, - { from: '/docs/developers/operations-api/token-authentication', to: '/reference/v4/security/jwt-authentication' }, - { from: '/docs/developers/operations-api/registration', to: '/reference/v4/operations-api/operations' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/developers/operations-api/utilities', to: '/reference/v4/operations-api/operations' }, + { from: '/docs/developers/operations-api/jobs', to: '/reference/v5/database/jobs' }, + { from: '/docs/developers/operations-api/logs', to: '/reference/v5/logging/operations' }, + { from: '/docs/developers/operations-api/sql-operations', to: '/reference/v5/database/sql' }, + { from: '/docs/developers/operations-api/clustering-nats', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/operations-api/clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/operations-api/token-authentication', to: '/reference/v5/security/jwt-authentication' }, + { from: '/docs/developers/operations-api/registration', to: '/reference/v5/operations-api/operations' }, + { from: '/docs/developers/operations-api/utilities', to: '/reference/v5/operations-api/operations' }, // ── Applications / Components ────────────────────────────────────────────── - { from: '/docs/developers/applications', to: '/reference/v4/components/overview' }, - { from: '/docs/developers/applications/defining-schemas', to: '/reference/v4/database/schema' }, + { from: '/docs/developers/applications', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/applications/defining-schemas', to: '/reference/v5/database/schema' }, { // TODO: eventually redirect to a dedicated learn page for database caching from: '/docs/developers/applications/caching', - to: '/reference/v4/resources/overview', + to: '/reference/v5/resources/overview', }, - { from: '/docs/developers/applications/data-loader', to: '/reference/v4/database/data-loader' }, - { from: '/docs/developers/applications/web-applications', to: '/reference/v4/components/applications' }, - { from: '/docs/developers/applications/debugging', to: '/reference/v4/components/overview' }, - { from: '/docs/developers/applications/define-routes', to: '/reference/v4/fastify-routes/overview' }, - { from: '/docs/developers/applications/defining-roles', to: '/reference/v4/users-and-roles/overview' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/developers/applications/', to: '/reference/v4/components/overview' }, + { from: '/docs/developers/applications/data-loader', to: '/reference/v5/database/data-loader' }, + { from: '/docs/developers/applications/web-applications', to: '/reference/v5/components/applications' }, + { from: '/docs/developers/applications/debugging', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/applications/define-routes', to: '/reference/v5/fastify-routes/overview' }, + { from: '/docs/developers/applications/defining-roles', to: '/reference/v5/users-and-roles/overview' }, // ── Old /developers/components/* (separate from /reference/components/*) ── - { from: '/docs/developers/components', to: '/reference/v4/components/overview' }, - { from: '/docs/developers/components/built-in', to: '/reference/v4/components/extension-api' }, - { from: '/docs/developers/components/reference', to: '/reference/v4/components/extension-api' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/developers/components/writing-extensions', to: '/reference/v4/components/extension-api' }, - { from: '/docs/developers/components/managing', to: '/reference/v4/components/overview' }, - { from: '/docs/developers/miscellaneous/sdks', to: '/reference/v4/components/overview' }, + { from: '/docs/developers/components', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/components/built-in', to: '/reference/v5/components/extension-api' }, + { from: '/docs/developers/components/reference', to: '/reference/v5/components/extension-api' }, + { from: '/docs/developers/components/writing-extensions', to: '/reference/v5/components/extension-api' }, + { from: '/docs/developers/components/managing', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/components/sdks', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/components/drivers', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/components/operations', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/components/google-data-studio', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/miscellaneous', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/miscellaneous/sdks', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/miscellaneous/google-data-studio', to: '/reference/v5/components/overview' }, + { from: '/docs/add-ons-and-sdks/google-data-studio', to: '/reference/v5/components/overview' }, + { from: '/docs/add-ons-and-sdks', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/plugin-best-practices', to: '/reference/v5/components/plugin-api' }, + { from: '/docs/developers/plugins', to: '/reference/v5/components/plugin-api' }, + { from: '/docs/developers/pub-sub', to: '/reference/v5/rest/websockets' }, + { from: '/docs/developers/vector-indexes', to: '/reference/v5/database/overview' }, + { from: '/docs/developers/request-lifecycle', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/testing', to: '/reference/v5/components/overview' }, + { from: '/docs/developers/applications/example-projects', to: '/learn' }, + { from: '/docs/extensions/functions', to: '/reference/v4/legacy/custom-functions' }, + { from: '/docs/custom-functions/host-static', to: '/reference/v4/legacy/custom-functions' }, + { from: '/docs/custom-functions/restarting-server', to: '/reference/v4/legacy/custom-functions' }, // ── Security ─────────────────────────────────────────────────────────────── - { from: '/docs/developers/security', to: '/reference/v4/security/overview' }, - { from: '/docs/developers/security/configuration', to: '/reference/v4/security/configuration' }, - { from: '/docs/developers/security/users-and-roles', to: '/reference/v4/users-and-roles/overview' }, - { from: '/docs/developers/security/jwt-auth', to: '/reference/v4/security/jwt-authentication' }, - { from: '/docs/developers/security/basic-auth', to: '/reference/v4/security/basic-authentication' }, - { from: '/docs/developers/security/certificate-management', to: '/reference/v4/security/certificate-management' }, - { from: '/docs/developers/security/certificate-verification', to: '/reference/v4/security/certificate-verification' }, - { from: '/docs/developers/security/mtls-auth', to: '/reference/v4/security/mtls-authentication' }, + { from: '/docs/developers/security', to: '/reference/v5/security/overview' }, + { from: '/docs/developers/security/configuration', to: '/reference/v5/security/configuration' }, + { from: '/docs/developers/security/users-and-roles', to: '/reference/v5/users-and-roles/overview' }, + { from: '/docs/developers/security/jwt-auth', to: '/reference/v5/security/jwt-authentication' }, + { from: '/docs/developers/security/basic-auth', to: '/reference/v5/security/basic-authentication' }, + { from: '/docs/developers/security/certificate-management', to: '/reference/v5/security/certificate-management' }, + { from: '/docs/developers/security/certificate-verification', to: '/reference/v5/security/certificate-verification' }, + { from: '/docs/developers/security/mtls-auth', to: '/reference/v5/security/mtls-authentication' }, // ── Replication / Clustering ─────────────────────────────────────────────── - { from: '/docs/developers/replication', to: '/reference/v4/replication/overview' }, - { from: '/docs/developers/replication/sharding', to: '/reference/v4/replication/sharding' }, - { from: '/docs/developers/clustering', to: '/reference/v4/replication/clustering' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/developers/clustering/certificate-management', to: '/reference/v4/security/certificate-management' }, - { from: '/docs/developers/clustering/enabling-clustering', to: '/reference/v4/replication/clustering' }, - { from: '/docs/developers/clustering/creating-a-cluster-user', to: '/reference/v4/replication/clustering' }, - { from: '/docs/developers/clustering/things-worth-knowing', to: '/reference/v4/replication/clustering' }, - { from: '/docs/developers/clustering/subscription-overview', to: '/reference/v4/replication/clustering' }, - { from: '/docs/developers/replication/clustering/enabling-clustering', to: '/reference/v4/replication/clustering' }, + { from: '/docs/developers/replication', to: '/reference/v5/replication/overview' }, + { from: '/docs/developers/replication/sharding', to: '/reference/v5/replication/sharding' }, + { from: '/docs/developers/clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/certificate-management', to: '/reference/v5/security/certificate-management' }, + { from: '/docs/developers/clustering/enabling-clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/creating-a-cluster-user', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/things-worth-knowing', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/subscription-overview', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/naming-a-node', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/requirements-and-definitions', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/establishing-routes', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/clustering/managing-subscriptions', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/replication/clustering/enabling-clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/replication/clustering/establishing-routes', to: '/reference/v5/replication/clustering' }, + { from: '/docs/developers/replication/clustering/naming-a-node', to: '/reference/v5/replication/clustering' }, + { + from: '/docs/developers/replication/clustering/requirements-and-definitions', + to: '/reference/v5/replication/clustering', + }, + { + from: '/docs/developers/replication/clustering/certificate-management', + to: '/reference/v5/security/certificate-management', + }, + { + from: '/docs/developers/replication/clustering/managing-subscriptions', + to: '/reference/v5/replication/clustering', + }, + { from: '/docs/configuration/clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/cluster-setup', to: '/reference/v5/replication/clustering' }, + { from: '/docs/clustering/creating-a-cluster-user', to: '/reference/v5/replication/clustering' }, + { from: '/docs/clustering/things-worth-knowing', to: '/reference/v5/replication/clustering' }, // ── REST / Real-time ──────────────────────────────────────────────────────── - { from: '/docs/developers/rest', to: '/reference/v4/rest/overview' }, - { from: '/docs/developers/real-time', to: '/reference/v4/rest/websockets' }, - { from: '/docs/developers/sql-guide', to: '/reference/v4/database/sql' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/developers/sql-guide/functions', to: '/reference/v4/database/sql' }, - { from: '/docs/developers/sql-guide/date-functions', to: '/reference/v4/database/sql' }, - { from: '/docs/developers/sql-guide/features-matrix', to: '/reference/v4/database/sql' }, - { from: '/docs/developers/sql-guide/json-search', to: '/reference/v4/database/sql' }, - { from: '/docs/developers/sql-guide/sql-geospatial-functions', to: '/reference/v4/database/sql' }, - { from: '/docs/developers/sql-guide/reserved-word', to: '/reference/v4/database/sql' }, + { from: '/docs/developers/rest', to: '/reference/v5/rest/overview' }, + { from: '/docs/developers/real-time', to: '/reference/v5/rest/websockets' }, + { from: '/docs/rest-api', to: '/reference/v5/rest/overview' }, + { from: '/docs/graphql/overview', to: '/reference/v5/graphql-querying/overview' }, + { from: '/docs/developers/sql-guide', to: '/reference/v5/database/sql' }, + + { from: '/docs/developers/sql-guide/functions', to: '/reference/v5/database/sql' }, + { from: '/docs/developers/sql-guide/date-functions', to: '/reference/v5/database/sql' }, + { from: '/docs/developers/sql-guide/features-matrix', to: '/reference/v5/database/sql' }, + { from: '/docs/developers/sql-guide/json-search', to: '/reference/v5/database/sql' }, + { from: '/docs/developers/sql-guide/sql-geospatial-functions', to: '/reference/v5/database/sql' }, + { from: '/docs/developers/sql-guide/reserved-word', to: '/reference/v5/database/sql' }, + { from: '/docs/sql-support', to: '/reference/v5/database/sql' }, + { from: '/docs/sql-guide/insert', to: '/reference/v5/database/sql' }, + { from: '/docs/sql-guide/select', to: '/reference/v5/database/sql' }, + { from: '/docs/sql-guide/datatypes', to: '/reference/v5/database/sql' }, + { from: '/docs/sql-guide/sql-geospatial-functions/geoconvert', to: '/reference/v5/database/sql' }, + + // ── Database / Resources ────────────────────────────────────────────────── + { from: '/docs/data-loading', to: '/reference/v5/database/data-loader' }, + { from: '/docs/replication', to: '/reference/v5/replication/overview' }, + { from: '/docs/resources', to: '/reference/v5/resources/overview' }, + { from: '/docs/performance', to: '/reference/v5' }, + { from: '/docs/performance-guide/data-modeling', to: '/reference/v5' }, // ── Configuration ───────────────────────────────────────────────────────── - { from: '/docs/deployments/configuration', to: '/reference/v4/configuration/overview' }, + { from: '/docs/deployments/configuration', to: '/reference/v5/configuration/overview' }, // ── CLI ─────────────────────────────────────────────────────────────────── - { from: '/docs/deployments/harper-cli', to: '/reference/v4/cli/overview' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/deployments/harperdb-cli', to: '/reference/v4/cli/overview' }, - { from: '/docs/administration/harperdb-cli', to: '/reference/v4/cli/overview' }, - - // ── Install / Upgrade (no equivalent page in /reference/v4/) ────────────── - // These remain as self-referential paths that may still exist on the live site. - // LOW TRAFFIC (<10 views for most subpaths): + { from: '/docs/deployments/harper-cli', to: '/reference/v5/cli/overview' }, + { from: '/docs/deployments/harperdb-cli', to: '/reference/v5/cli/overview' }, + { from: '/docs/administration/harperdb-cli', to: '/reference/v5/cli/overview' }, + { from: '/docs/deployments/install-harper/harper-cli', to: '/reference/v5/cli/overview' }, + { from: '/docs/cli', to: '/reference/v5/cli/overview' }, + + // ── Install / Upgrade (no equivalent page in /reference/) ───────────────── + { from: '/docs/deployments/install-harper', to: '/learn/getting-started/install-and-connect-harper' }, + { from: '/docs/deployments/install-harper/linux', to: '/learn/getting-started/install-and-connect-harper' }, { from: '/docs/deployments/upgrade-hdb-instance', to: '/learn' }, { from: '/docs/administration/upgrade-hdb-instance', to: '/learn' }, - // ── Harper Cloud → Legacy ───────────────────────────────────────────────── + // ── Harper Cloud → Legacy (v4 only, no v5 equivalent) ───────────────────── { from: '/docs/deployments/harper-cloud', to: '/reference/v4/legacy/cloud' }, - // LOW TRAFFIC (<10 views each): { from: '/docs/deployments/harperdb-cloud', to: '/reference/v4/legacy/cloud' }, + { from: '/docs/deployments/harper-cloud/alarms', to: '/reference/v4/legacy/cloud' }, + { from: '/docs/deployments/harper-cloud/iops-impact', to: '/reference/v4/legacy/cloud' }, + { from: '/docs/deployments/harper-cloud/verizon-5g-wavelength-instances', to: '/reference/v4/legacy/cloud' }, + { from: '/docs/deployments/harper-cloud/instance-size-hardware-specs', to: '/reference/v4/legacy/cloud' }, // ── Studio ──────────────────────────────────────────────────────────────── - { from: '/docs/administration/harper-studio', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/create-account', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/login-password-reset', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/instances', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/instance-metrics', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/instance-configuration', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/manage-databases-browse-data', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/manage-instance-users', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/manage-applications', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/enable-mixed-content', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/query-instance-data', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/organizations', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harper-studio/manage-instance-roles', to: '/reference/v4/studio/overview' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/administration/harperdb-studio/', to: '/reference/v4/studio/overview' }, - { from: '/docs/administration/harperdb-studio/manage-applications', to: '/reference/v4/studio/overview' }, + { from: '/docs/administration/harper-studio', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/create-account', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/login-password-reset', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/instances', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/instance-metrics', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/instance-configuration', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-databases-browse-data', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-instance-users', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-applications', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/enable-mixed-content', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/query-instance-data', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/organizations', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harper-studio/manage-instance-roles', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-applications', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/instances', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/organizations', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/create-account', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/create-an-account', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/login-password-reset', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/instance-configuration', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/instance-metrics', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/instance-example-code', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-databases-browse-data', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-schemas-browse-data', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-instance-users', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-instance-roles', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-charts', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-replication', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-clustering', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/manage-functions', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/enable-mixed-content', to: '/reference/v5/studio/overview' }, + { from: '/docs/administration/harperdb-studio/query-instance-data', to: '/reference/v5/studio/overview' }, // ── Logging ─────────────────────────────────────────────────────────────── - { from: '/docs/administration/logging', to: '/reference/v4/logging/overview' }, - { from: '/docs/administration/logging/standard-logging', to: '/reference/v4/logging/overview' }, - { from: '/docs/administration/logging/audit-logging', to: '/reference/v4/logging/overview' }, - { from: '/docs/administration/logging/transaction-logging', to: '/reference/v4/logging/overview' }, + { from: '/docs/administration/logging', to: '/reference/v5/logging/overview' }, + { from: '/docs/administration/logging/standard-logging', to: '/reference/v5/logging/overview' }, + { from: '/docs/administration/logging/audit-logging', to: '/reference/v5/logging/overview' }, + { from: '/docs/administration/logging/transaction-logging', to: '/reference/v5/logging/overview' }, // ── Administration: other ───────────────────────────────────────────────── - { from: '/docs/administration/cloning', to: '/reference/v4/replication/overview' }, - { from: '/docs/administration/compact', to: '/reference/v4/database/compaction' }, - { from: '/docs/administration/jobs', to: '/reference/v4/database/jobs' }, + { from: '/docs/administration/cloning', to: '/reference/v5/replication/overview' }, + { from: '/docs/administration/compact', to: '/reference/v5/database/compaction' }, + { from: '/docs/administration/jobs', to: '/reference/v5/database/jobs' }, // ── Old /docs/reference/* ───────────────────────────────────────────────── - { from: '/docs/reference', to: '/reference/v4' }, - { from: '/docs/reference/globals', to: '/reference/v4/components/javascript-environment' }, - { from: '/docs/reference/resources', to: '/reference/v4/resources/overview' }, - { from: '/docs/reference/resources/instance-binding', to: '/reference/v4/resources/resource-api' }, - { from: '/docs/reference/resources/migration', to: '/reference/v4/database/data-loader' }, - { from: '/docs/reference/resources/query-optimization', to: '/reference/v4/resources/query-optimization' }, - { from: '/docs/reference/components', to: '/reference/v4/components/overview' }, - { from: '/docs/reference/components/built-in-extensions', to: '/reference/v4/components/extension-api' }, - { from: '/docs/reference/components/extensions', to: '/reference/v4/components/extension-api' }, - { from: '/docs/reference/components/plugins', to: '/reference/v4/components/plugin-api' }, - { from: '/docs/reference/components/applications', to: '/reference/v4/components/applications' }, - { from: '/docs/reference/components/configuration', to: '/reference/v4/components/overview' }, - { from: '/docs/reference/analytics', to: '/reference/v4/analytics/overview' }, - { from: '/docs/reference/dynamic-schema', to: '/reference/v4/database/schema' }, - { from: '/docs/reference/data-types', to: '/reference/v4/database/schema' }, - { from: '/docs/reference/blob', to: '/reference/v4/database/schema' }, - { from: '/docs/reference/transactions', to: '/reference/v4/database/transaction' }, - { from: '/docs/reference/graphql', to: '/reference/v4/graphql-querying/overview' }, - { from: '/docs/reference/content-types', to: '/reference/v4/rest/content-types' }, - { from: '/docs/reference/headers', to: '/reference/v4/rest/headers' }, - { from: '/docs/reference/roles', to: '/reference/v4/users-and-roles/overview' }, - { from: '/docs/reference/storage-algorithm', to: '/reference/v4/database/storage-algorithm' }, - { from: '/docs/reference/limits', to: '/reference/v4/database/schema' }, - { from: '/docs/reference/architecture', to: '/reference/v4' }, - { from: '/docs/reference/clustering', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/clustering/enabling-clustering', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/clustering/establishing-routes', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/clustering/subscription-overview', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/clustering/managing-subscriptions', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/clustering/things-worth-knowing', to: '/reference/v4/replication/clustering' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/reference/clustering/certificate-management', to: '/reference/v4/security/certificate-management' }, - { from: '/docs/reference/clustering/creating-a-cluster-user', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/clustering/naming-a-node', to: '/reference/v4/replication/clustering' }, - { from: '/docs/reference/sql-guide', to: '/reference/v4/database/sql' }, - { from: '/docs/reference/sql-guide/json-search', to: '/reference/v4/database/sql' }, - // LOW TRAFFIC (<10 views): - { from: '/docs/reference/sql-guide/date-functions', to: '/reference/v4/database/sql' }, - { from: '/docs/reference/sql-guide/functions', to: '/reference/v4/database/sql' }, - { from: '/docs/reference/sql-guide/sql-geospatial-functions', to: '/reference/v4/database/sql' }, - - // ── Old /technical-details/reference/* (pre-v4 paths) ──────────────────── - // LOW TRAFFIC (<10 views): - { from: '/technical-details/reference/resources', to: '/reference/v4/resources/overview' }, + { from: '/docs/reference', to: '/reference/v5' }, + { from: '/docs/reference/globals', to: '/reference/v5/components/javascript-environment' }, + { from: '/docs/reference/resources', to: '/reference/v5/resources/overview' }, + { from: '/docs/reference/resources/instance-binding', to: '/reference/v5/resources/resource-api' }, + { from: '/docs/reference/resources/migration', to: '/reference/v5/database/data-loader' }, + { from: '/docs/reference/resources/query-optimization', to: '/reference/v5/resources/query-optimization' }, + { from: '/docs/reference/components', to: '/reference/v5/components/overview' }, + { from: '/docs/reference/components/built-in-extensions', to: '/reference/v5/components/extension-api' }, + { from: '/docs/reference/components/extensions', to: '/reference/v5/components/extension-api' }, + { from: '/docs/reference/components/plugins', to: '/reference/v5/components/plugin-api' }, + { from: '/docs/reference/components/applications', to: '/reference/v5/components/applications' }, + { from: '/docs/reference/components/configuration', to: '/reference/v5/components/overview' }, + { from: '/docs/reference/analytics', to: '/reference/v5/analytics/overview' }, + { from: '/docs/reference/dynamic-schema', to: '/reference/v5/database/schema' }, + { from: '/docs/reference/data-types', to: '/reference/v5/database/schema' }, + { from: '/docs/reference/blob', to: '/reference/v5/database/schema' }, + { from: '/docs/reference/transactions', to: '/reference/v5/database/transaction' }, + { from: '/docs/reference/graphql', to: '/reference/v5/graphql-querying/overview' }, + { from: '/docs/reference/content-types', to: '/reference/v5/rest/content-types' }, + { from: '/docs/reference/headers', to: '/reference/v5/rest/headers' }, + { from: '/docs/reference/roles', to: '/reference/v5/users-and-roles/overview' }, + { from: '/docs/reference/storage-algorithm', to: '/reference/v5/database/storage-algorithm' }, + { from: '/docs/reference/limits', to: '/reference/v5/database/schema' }, + { from: '/docs/reference/architecture', to: '/reference/v5' }, + { from: '/docs/reference/clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/enabling-clustering', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/establishing-routes', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/subscription-overview', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/managing-subscriptions', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/things-worth-knowing', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/certificate-management', to: '/reference/v5/security/certificate-management' }, + { from: '/docs/reference/clustering/creating-a-cluster-user', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/naming-a-node', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/clustering/requirements-and-definitions', to: '/reference/v5/replication/clustering' }, + { from: '/docs/reference/sql-guide', to: '/reference/v5/database/sql' }, + { from: '/docs/reference/sql-guide/json-search', to: '/reference/v5/database/sql' }, + { from: '/docs/reference/sql-guide/date-functions', to: '/reference/v5/database/sql' }, + { from: '/docs/reference/sql-guide/functions', to: '/reference/v5/database/sql' }, + { from: '/docs/reference/sql-guide/sql-geospatial-functions', to: '/reference/v5/database/sql' }, + { from: '/docs/reference/sql-guide/reserved-word', to: '/reference/v5/database/sql' }, + { from: '/docs/reference/sql-guide/features-matrix', to: '/reference/v5/database/sql' }, + + // ── Old /docs/reference/* (continued) ───────────────────────────────────── + { from: '/docs/reference/rest', to: '/reference/v5/rest/overview' }, + { from: '/docs/reference/command-line-interface', to: '/reference/v5/cli/overview' }, + { from: '/docs/reference/configuration-file', to: '/reference/v5/configuration/overview' }, + { from: '/docs/reference/security/roles-and-permissions', to: '/reference/v5/users-and-roles/overview' }, + { from: '/docs/reference/Applications/defining-roles', to: '/reference/v5/users-and-roles/overview' }, + { from: '/docs/reference/api/roles/add-role', to: '/reference/v5/users-and-roles/operations' }, + + // ── Old /technical-details/reference/* (pre-v4 paths, no version prefix) ── + { from: '/technical-details/reference/resources', to: '/reference/v5/resources/overview' }, + { from: '/docs/technical-details/reference/resource', to: '/reference/v5/resources/overview' }, // ── Old /docs/administration/administration ──────────────────────────────── - // LOW TRAFFIC (<10 views): - { from: '/docs/administration/administration', to: '/reference/v4' }, - { from: '/docs/administration', to: '/reference/v4' }, - { from: '/docs/deployments', to: '/reference/v4' }, + { from: '/docs/administration/administration', to: '/reference/v5' }, + { from: '/docs/administration', to: '/reference/v5' }, + { from: '/docs/deployments', to: '/reference/v5' }, // ── Release notes ───────────────────────────────────────────────────────── - // Only paths seen in pageview data (Oct 2025–Feb 2026). The old docs embedded - // release notes under /docs/technical-details/release-notes/ using a dot-separated - // version name format (e.g. "4.tucker" instead of "v4-tucker"). + // Only non-versioned paths seen in pageview data. { from: '/docs/technical-details/release-notes', to: '/release-notes' }, - // LOW TRAFFIC (<16 views each): - { from: '/docs/4.3/technical-details/release-notes/4.tucker/2.1.1', to: '/release-notes/v2-penny/2.1.1' }, - { from: '/docs/4.3/technical-details/release-notes/4.tucker/1.3.1', to: '/release-notes/v1-alby/1.3.1' }, - { from: '/docs/4.3/technical-details/release-notes/4.tucker/3.0.0', to: '/release-notes/v3-monkey/3.0.0' }, + { from: '/docs/release-notes/4.tucker/4.0.3', to: '/release-notes' }, + { from: '/docs/release-notes/4.tucker/4.0.5', to: '/release-notes' }, + { from: '/docs/release-notes/1.alby', to: '/release-notes' }, + { from: '/docs/release-notes/1.alby/1.2.0', to: '/release-notes' }, + { from: '/docs/release-notes/1.alby/1.3.0', to: '/release-notes' }, + { from: '/docs/release-notes/2.penny/2.2.0', to: '/release-notes' }, + { from: '/docs/release-notes/2.penny/2.3.1', to: '/release-notes' }, + { from: '/docs/release-notes/3.monkey', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/1.3.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/1.3.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/1.alby', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/1.alby/1.2.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/1.alby/1.3.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/1.alby/1.3.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.1.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.2.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.2.3', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.3.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.penny', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.penny/2.2.2', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.penny/2.2.3', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.penny/2.3.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/2.penny/2.3.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.0.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.1.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.1.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.1.2', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.1.3', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.1.4', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.1.5', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.2.0', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.2.1', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.monkey', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.monkey/3.1.2', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/3.monkey/3.1.3', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/4.tucker', to: '/release-notes' }, + { from: '/docs/technical-details/release-notes/4.tucker/4.alby/1.3.0', to: '/release-notes' }, + + // ── Misc old paths (pre-v4, no /docs prefix) ────────────────────────────── + { from: '/developers/applications', to: '/reference/v5/components/applications' }, + { from: '/developers/components/built-in', to: '/reference/v5/components/extension-api' }, + { from: '/technical-details/reference/globals', to: '/reference/v5/components/javascript-environment' }, + { from: '/harperdb-4.2-pre-release/getting-started', to: '/learn' }, + { from: '/harperdb-4.3-pre-release/developers/rest', to: '/reference/v5/rest/overview' }, + { from: '/docs/api/ops-api', to: '/reference/v5/operations-api/overview' }, + + // ── Learn ───────────────────────────────────────────────────────────────── + { from: '/learn/developers/coming-soon', to: '/learn' }, + + // ── Fabric ──────────────────────────────────────────────────────────────── + { from: '/fabric/rest-api', to: '/fabric' }, + { from: '/fabric/functions', to: '/fabric' }, ]; +export const redirects: RedirectRule[] = [...currentRedirects, ...historicRedirects]; + // ─── Wildcard / dynamic redirects ──────────────────────────────────────────── // Called by Docusaurus for every existing page path to generate inbound redirects. diff --git a/scripts/harper-docs-analytics.csv b/scripts/harper-docs-analytics.csv index 3c4d9c37..859b467d 100644 --- a/scripts/harper-docs-analytics.csv +++ b/scripts/harper-docs-analytics.csv @@ -1461,7 +1461,6 @@ Page path,Views,Active users,Views per active user,Average engagement time per a /docs/5.0/migration-guide,1,1,1,1,5,0,0 /docs/add-ons-and-sdks/google-data-studio,1,1,1,0,4,0,0 /docs/administration/edge,1,1,1,2,5,0,0 -"/docs/administration/harperdb-studio""",1,1,1,0,3,0,0 /docs/administration/harperdb-studio/create-an-account,1,1,1,1,5,0,0 /docs/administration/harperdb-studio/enable-mixed-content,1,1,1,1,5,0,0 /docs/administration/harperdb-studio/instance-configuration,1,1,1,0,4,0,0 diff --git a/scripts/pageview-data-test.js b/scripts/pageview-data-test.js new file mode 100644 index 00000000..2e357bf1 --- /dev/null +++ b/scripts/pageview-data-test.js @@ -0,0 +1,215 @@ +#!/usr/bin/env node +/** + * Pageview data redirect/availability test + * + * Reads paths from the GA pageview CSV and checks each one against a running + * Docusaurus server (default: http://localhost:3000). + * + * Run after: + * npm run build + * npm run serve (in a separate terminal) + * + * Usage: + * node scripts/pageview-data-test.js [base-url] [--only-problems] [--min-views N] + * + * Options: + * base-url Server to test against (default: http://localhost:3000) + * --only-problems Only print 404s and unexpected results, not 200s/3xxs + * --min-views N Only test paths with at least N views (default: 1) + * + * Examples: + * node scripts/pageview-data-test.js + * node scripts/pageview-data-test.js http://localhost:3000 --only-problems + * node scripts/pageview-data-test.js http://localhost:3000 --min-views 10 --only-problems + */ + +const fs = require('node:fs'); +const path = require('node:path'); +const readline = require('node:readline'); + +// ── Parse CLI args ──────────────────────────────────────────────────────────── +const args = process.argv.slice(2); +const baseUrl = args.find((a) => a.startsWith('http')) ?? 'http://localhost:3000'; +const onlyProblems = args.includes('--only-problems'); +const minViewsArg = args.find((a) => a.startsWith('--min-views')); +const minViews = minViewsArg ? parseInt(minViewsArg.split('=')[1] ?? args[args.indexOf(minViewsArg) + 1], 10) : 1; + +const CSV_PATH = path.join(__dirname, 'harper-docs-analytics.csv'); +const CONCURRENCY = 10; // simultaneous requests + +// ── Parse CSV ───────────────────────────────────────────────────────────────── +async function parseCsv(filePath) { + const entries = []; + const rl = readline.createInterface({ input: fs.createReadStream(filePath) }); + + let headerParsed = false; + let pathCol = -1; + let viewsCol = -1; + + for await (const line of rl) { + // Skip comment lines + if (line.startsWith('#') || line.trim() === '') continue; + + const cols = line.split(','); + + if (!headerParsed) { + pathCol = cols.indexOf('Page path'); + viewsCol = cols.indexOf('Views'); + headerParsed = true; + continue; + } + + const pagePath = cols[pathCol]?.trim(); + const views = parseInt(cols[viewsCol]?.trim(), 10); + + if (!pagePath || isNaN(views)) continue; + if (views < minViews) continue; + + // Skip non-doc paths that aren't relevant to redirect testing + // (robots.txt, search, etc.) + if (pagePath === '/robots.txt' || pagePath === '/search') continue; + + entries.push({ path: pagePath, views }); + } + + // Sort by views descending so highest-traffic paths appear first in output + return entries.sort((a, b) => b.views - a.views); +} + +// ── Check a single path ─────────────────────────────────────────────────────── +async function checkPath(pagePath) { + const url = `${baseUrl}${pagePath}`; + try { + const res = await fetch(url, { + method: 'GET', + redirect: 'manual', // don't follow redirects — we want to see the 3xx + }); + + let finalStatus = res.status; + let redirectTarget = null; + + if (res.status >= 300 && res.status < 400) { + redirectTarget = res.headers.get('location') ?? '(no location header)'; + } + + return { path: pagePath, status: finalStatus, redirectTarget, error: null }; + } catch (err) { + return { path: pagePath, status: null, redirectTarget: null, error: err.message }; + } +} + +// ── Run checks with concurrency limit ──────────────────────────────────────── +async function runChecks(entries) { + const results = []; + const total = entries.length; + let completed = 0; + + // Process in batches of CONCURRENCY + for (let i = 0; i < entries.length; i += CONCURRENCY) { + const batch = entries.slice(i, i + CONCURRENCY); + const batchResults = await Promise.all( + batch.map(({ path: p, views }) => checkPath(p).then((r) => ({ ...r, views }))) + ); + results.push(...batchResults); + completed += batch.length; + process.stderr.write(`\rProgress: ${completed}/${total}`); + } + process.stderr.write('\n'); + + return results; +} + +// ── Print results ───────────────────────────────────────────────────────────── +function printResults(results) { + const counts = { ok: 0, redirect: 0, notFound: 0, error: 0, other: 0 }; + + const notFound = []; + const errors = []; + const redirects = []; + const ok = []; + + for (const r of results) { + if (r.error) { + counts.error++; + errors.push(r); + } else if (r.status === 200) { + counts.ok++; + ok.push(r); + } else if (r.status >= 300 && r.status < 400) { + counts.redirect++; + redirects.push(r); + } else if (r.status === 404) { + counts.notFound++; + notFound.push(r); + } else { + counts.other++; + errors.push(r); + } + } + + // ── 404s (most important) ───────────────────────────────────────────────── + if (notFound.length > 0) { + console.log('\n── 404 Not Found ────────────────────────────────────────────'); + for (const r of notFound) { + console.log(` [${r.views} views] ${r.path}`); + } + } + + // ── Connection/fetch errors ─────────────────────────────────────────────── + if (errors.length > 0) { + console.log('\n── Errors ───────────────────────────────────────────────────'); + for (const r of errors) { + const detail = r.error ?? `HTTP ${r.status}`; + console.log(` [${r.views} views] ${r.path} → ${detail}`); + } + } + + if (!onlyProblems) { + // ── Redirects ───────────────────────────────────────────────────────── + if (redirects.length > 0) { + console.log('\n── Redirects (3xx) ──────────────────────────────────────────'); + for (const r of redirects) { + console.log(` [${r.views} views] ${r.path} → ${r.redirectTarget} (${r.status})`); + } + } + + // ── 200 OK ──────────────────────────────────────────────────────────── + if (ok.length > 0) { + console.log('\n── 200 OK ───────────────────────────────────────────────────'); + for (const r of ok) { + console.log(` [${r.views} views] ${r.path}`); + } + } + } + + // ── Summary ─────────────────────────────────────────────────────────────── + console.log('\n── Summary ──────────────────────────────────────────────────'); + console.log(` 200 OK: ${counts.ok}`); + console.log(` 3xx redirect:${counts.redirect}`); + console.log(` 404 not found:${counts.notFound}`); + console.log(` errors: ${counts.error}`); + console.log(` total tested:${results.length}`); + if (minViews > 1) console.log(` (paths with >= ${minViews} views only)`); + + if (notFound.length > 0 || errors.length > 0) { + process.exitCode = 1; + } +} + +// ── Main ────────────────────────────────────────────────────────────────────── +async function main() { + console.log(`Testing against: ${baseUrl}`); + console.log(`Min views filter: ${minViews}`); + console.log(`CSV: ${CSV_PATH}\n`); + + const entries = await parseCsv(CSV_PATH); + console.log(`Paths to test: ${entries.length}`); + + const results = await runChecks(entries); + printResults(results); +} + +main().catch((err) => { + console.error('Fatal:', err); + process.exit(1); +}); diff --git a/scripts/postbuild.js b/scripts/postbuild.js deleted file mode 100644 index c1c35ef0..00000000 --- a/scripts/postbuild.js +++ /dev/null @@ -1,118 +0,0 @@ -const path = require('node:path'); -const fs = require('node:fs/promises'); - -// Copy /myPath.html to /myPath/index.html -// This ensures both URL patterns work: /myPath and /myPath/ -async function generateIndexHtmlFiles(outDir) { - console.log('Post-build: Creating index.html files from hoisted pages...'); - - // Walk through all directories recursively - async function* walkDirs(dir) { - const dirents = await fs.readdir(dir, { withFileTypes: true }); - for (const dirent of dirents) { - if (dirent.isDirectory()) { - const res = path.resolve(dir, dirent.name); - yield res; - yield* walkDirs(res); - } - } - } - - const processedFiles = []; - - for await (const dirPath of walkDirs(outDir)) { - // Check if there's already an index.html in this directory - const indexPath = path.join(dirPath, 'index.html'); - try { - await fs.stat(indexPath); - // index.html exists, skip this directory - continue; - } catch { - // No index.html, continue checking - } - - // Check if there's a sibling HTML file with the same name as the directory - const dirName = path.basename(dirPath); - const siblingHtmlPath = path.join(path.dirname(dirPath), `${dirName}.html`); - - try { - await fs.stat(siblingHtmlPath); - // Sibling HTML file exists, copy it as index.html - await fs.copyFile(siblingHtmlPath, indexPath); - processedFiles.push(`${dirName}.html → ${dirName}/index.html`); - } catch { - // No sibling HTML file, skip - } - } - - if (processedFiles.length > 0) { - console.log(`Post-build: Created ${processedFiles.length} index.html files`); - // Uncomment to see details: - // processedFiles.forEach(f => console.log(` - ${f}`)); - } else { - console.log('Post-build: No index.html files needed'); - } -} - -// Copy redirect index.html files to .html ONLY for old release notes paths -// This ensures redirects work with simple HTTP servers like `npm run serve` -// Example: docs/technical-details/release-notes/4.tucker/4.4.0/index.html -// -> docs/technical-details/release-notes/4.tucker/4.4.0.html -async function generateReleaseNotesRedirectHtmlFiles(outDir) { - console.log('Post-build: Creating .html redirect files for old release notes paths...'); - - const redirectBase = path.join(outDir, 'docs', 'technical-details', 'release-notes'); - - try { - await fs.stat(redirectBase); - } catch { - console.log('Post-build: No release notes redirects found, skipping'); - return; - } - - // Walk through all directories recursively - async function* walkDirs(dir) { - const dirents = await fs.readdir(dir, { withFileTypes: true }); - for (const dirent of dirents) { - if (dirent.isDirectory()) { - const res = path.resolve(dir, dirent.name); - yield res; - yield* walkDirs(res); - } - } - } - - const processedFiles = []; - - for await (const dirPath of walkDirs(redirectBase)) { - // Check if this directory has an index.html redirect file - const indexPath = path.join(dirPath, 'index.html'); - try { - const content = await fs.readFile(indexPath, 'utf8'); - // Check if it's a redirect file (contains meta refresh) - if (content.includes('meta http-equiv="refresh"')) { - // Create a sibling .html file with the same content - const dirName = path.basename(dirPath); - const siblingHtmlPath = path.join(path.dirname(dirPath), `${dirName}.html`); - await fs.copyFile(indexPath, siblingHtmlPath); - processedFiles.push(`${dirName}/index.html → ${dirName}.html`); - } - } catch { - // No index.html or other error, skip - } - } - - if (processedFiles.length > 0) { - console.log(`Post-build: Created ${processedFiles.length} .html redirect files`); - // Uncomment to see details: - // processedFiles.forEach(f => console.log(` - ${f}`)); - } else { - console.log('Post-build: No .html redirect files needed'); - } -} - -// Run the post-processing -const buildDir = path.join(__dirname, '..', 'build'); -generateIndexHtmlFiles(buildDir) - .then(() => generateReleaseNotesRedirectHtmlFiles(buildDir)) - .catch(console.error); diff --git a/sidebarsReference.ts b/sidebarsReference.ts index 45253956..878c3d7f 100644 --- a/sidebarsReference.ts +++ b/sidebarsReference.ts @@ -3,8 +3,499 @@ import type { SidebarsConfig } from '@docusaurus/plugin-content-docs'; const sidebars: SidebarsConfig = { referenceSidebar: [ { - type: 'autogenerated', - dirName: '.', + type: 'doc', + id: 'index', + label: 'Welcome', + }, + { + type: 'category', + label: 'CLI', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'cli/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'cli/commands', + label: 'Commands', + }, + { + type: 'doc', + id: 'cli/operations-api-commands', + label: 'Operations API Commands', + }, + { + type: 'doc', + id: 'cli/authentication', + label: 'Authentication', + }, + ], + }, + { + type: 'category', + label: 'Configuration', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'configuration/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'configuration/options', + label: 'Options', + }, + { + type: 'doc', + id: 'configuration/operations', + label: 'Operations', + }, + ], + }, + { + type: 'category', + label: 'Environment Variables', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'environment-variables/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'Static Files', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'static-files/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'HTTP', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'http/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'http/configuration', + label: 'Configuration', + }, + { + type: 'doc', + id: 'http/api', + label: 'API', + }, + { + type: 'doc', + id: 'http/tls', + label: 'TLS', + }, + ], + }, + { + type: 'category', + label: 'REST', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'rest/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'rest/content-types', + label: 'Content Types', + }, + { + type: 'doc', + id: 'rest/headers', + label: 'Headers', + }, + { + type: 'doc', + id: 'rest/querying', + label: 'Querying', + }, + { + type: 'doc', + id: 'rest/websockets', + label: 'WebSockets', + }, + { + type: 'doc', + id: 'rest/server-sent-events', + label: 'Server Sent Events', + }, + ], + }, + { + type: 'category', + label: 'Logging', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'logging/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'logging/configuration', + label: 'Configuration', + }, + { + type: 'doc', + id: 'logging/api', + label: 'API', + }, + { + type: 'doc', + id: 'logging/operations', + label: 'Operations', + }, + ], + }, + { + type: 'category', + label: 'Analytics', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'analytics/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'analytics/operations', + label: 'Operations', + }, + ], + }, + { + type: 'category', + label: 'MQTT', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'mqtt/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'mqtt/configuration', + label: 'Configuration', + }, + ], + }, + { + type: 'category', + label: 'GraphQL Querying', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'graphql-querying/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'Studio', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'studio/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'Fastify Routes', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'fastify-routes/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'Security', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'security/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'security/configuration', + label: 'Configuration', + }, + { + type: 'doc', + id: 'security/basic-authentication', + label: 'Basic Authentication', + }, + { + type: 'doc', + id: 'security/jwt-authentication', + label: 'JWT Authentication', + }, + { + type: 'doc', + id: 'security/mtls-authentication', + label: 'mTLS Authentication', + }, + { + type: 'doc', + id: 'security/certificate-management', + label: 'Certificate Management', + }, + { + type: 'doc', + id: 'security/certificate-verification', + label: 'Certificate Verification', + }, + { + type: 'doc', + id: 'security/api', + label: 'API', + }, + ], + }, + { + type: 'category', + label: 'Users & Roles', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'users-and-roles/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'users-and-roles/configuration', + label: 'Configuration', + }, + { + type: 'doc', + id: 'users-and-roles/operations', + label: 'Operations', + }, + ], + }, + { + type: 'category', + label: 'Database', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'database/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'database/schema', + label: 'Schema', + }, + { + type: 'doc', + id: 'database/api', + label: 'API', + }, + { + type: 'doc', + id: 'database/data-loader', + label: 'Data Loader', + }, + { + type: 'doc', + id: 'database/storage-algorithm', + label: 'Storage Algorithm', + }, + { + type: 'doc', + id: 'database/jobs', + label: 'Jobs', + }, + { + type: 'doc', + id: 'database/system-tables', + label: 'System Tables', + }, + { + type: 'doc', + id: 'database/compaction', + label: 'Compaction', + }, + { + type: 'doc', + id: 'database/transaction', + label: 'Transaction Logging', + }, + { + type: 'doc', + id: 'database/sql', + label: 'SQL', + }, + ], + }, + { + type: 'category', + label: 'Resources', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'resources/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'resources/resource-api', + label: 'Resource API', + }, + { + type: 'doc', + id: 'resources/query-optimization', + label: 'Query Optimization', + }, + ], + }, + { + type: 'category', + label: 'Components', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'components/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'components/applications', + label: 'Applications', + }, + { + type: 'doc', + id: 'components/extension-api', + label: 'Extension API', + }, + { + type: 'doc', + id: 'components/plugin-api', + label: 'Plugin API', + }, + { + type: 'doc', + id: 'components/javascript-environment', + label: 'JavaScript Environment', + }, + ], + }, + { + type: 'category', + label: 'Replication', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'replication/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'replication/clustering', + label: 'Clustering', + }, + { + type: 'doc', + id: 'replication/sharding', + label: 'Sharding', + }, + ], + }, + { + type: 'category', + label: 'Operations API', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'operations-api/overview', + label: 'Overview', + }, + { + type: 'doc', + id: 'operations-api/operations', + label: 'Operations', + }, + ], + }, + { + type: 'category', + label: 'Legacy', + collapsible: false, + className: 'learn-category-header', + items: [ + { + type: 'doc', + id: 'legacy/cloud', + label: 'Harper Cloud', + }, + { + type: 'doc', + id: 'legacy/custom-functions', + label: 'Custom Functions', + }, + ], }, ], }; From 7c62241afc25a184a1a0c7a82adc0c7acec12272 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Mon, 30 Mar 2026 20:50:46 -0600 Subject: [PATCH 39/51] remove paginator for reference section --- docusaurus.config.ts | 22 +++++++++---------- src/theme/DocItem/Paginator.tsx | 8 +++++++ .../DocsVersionDropdownNavbarItem.tsx | 11 +++------- 3 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 src/theme/DocItem/Paginator.tsx diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 609c6c0b..8c8987a1 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -83,17 +83,17 @@ const config: Config = { path: 'reference', routeBasePath: 'reference', sidebarPath: './sidebarsReference.ts', - editUrl: ({ versionDocsDirPath, docPath }) => { - // For versioned docs: versionDocsDirPath is like 'versioned_docs/version-4' - // For current docs: versionDocsDirPath is 'docs' - if (versionDocsDirPath.startsWith('versioned_docs')) { - // Versioned docs are in versioned_docs/version-X.X/ - return `https://github.com/HarperFast/documentation/blob/main/${versionDocsDirPath}/${docPath}`; - } else { - // Current docs are in the root docs/ directory - return `https://github.com/HarperFast/documentation/blob/main/docs/${docPath}`; - } - }, + // editUrl: ({ versionDocsDirPath, docPath }) => { + // // For versioned docs: versionDocsDirPath is like 'versioned_docs/version-4' + // // For current docs: versionDocsDirPath is 'docs' + // if (versionDocsDirPath.startsWith('versioned_docs')) { + // // Versioned docs are in versioned_docs/version-X.X/ + // return `https://github.com/HarperFast/documentation/blob/main/${versionDocsDirPath}/${docPath}`; + // } else { + // // Current docs are in the root docs/ directory + // return `https://github.com/HarperFast/documentation/blob/main/docs/${docPath}`; + // } + // }, lastVersion: 'current', includeCurrentVersion: false, versions: { diff --git a/src/theme/DocItem/Paginator.tsx b/src/theme/DocItem/Paginator.tsx new file mode 100644 index 00000000..cdd9e745 --- /dev/null +++ b/src/theme/DocItem/Paginator.tsx @@ -0,0 +1,8 @@ +import React from 'react'; +import DocItemPaginator from '@theme-original/DocItem/Paginator'; +import { useActivePlugin } from '@docusaurus/plugin-content-docs/client'; + +export default function DocItemPaginatorWrapper() { + const activePlugin = useActivePlugin(); + return activePlugin?.pluginId === 'reference' ? null : ; +} diff --git a/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.tsx b/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.tsx index 75c87100..671e8f17 100644 --- a/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.tsx +++ b/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.tsx @@ -1,14 +1,9 @@ import React from 'react'; import DocsVersionDropdownNavbarItem from '@theme-original/NavbarItem/DocsVersionDropdownNavbarItem'; -import { useLocation } from '@docusaurus/router'; +import { useActivePlugin } from '@docusaurus/plugin-content-docs/client'; import type { Props } from '@theme/NavbarItem/DocsVersionDropdownNavbarItem'; -function isNonVersionedPathname(pathname: string) { - return pathname.startsWith('/fabric') || pathname.startsWith('/release-notes') || pathname.startsWith('/learn'); -} - export default function DocsVersionDropdownNavbarItemWrapper(props: Props) { - const location = useLocation(); - - return isNonVersionedPathname(location.pathname) ? null : ; + const activePlugin = useActivePlugin(); + return activePlugin?.pluginId !== 'reference' ? null : ; } From 2a4da0cfd9ed6598ddd9af4344360c95e788e214 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 08:40:37 -0600 Subject: [PATCH 40/51] touch up sidebar styles --- .../version-v4-sidebars.json | 375 +++++++++--------- src/css/custom.css | 37 ++ 2 files changed, 225 insertions(+), 187 deletions(-) diff --git a/reference_versioned_sidebars/version-v4-sidebars.json b/reference_versioned_sidebars/version-v4-sidebars.json index 29e72db0..d8f7c280 100644 --- a/reference_versioned_sidebars/version-v4-sidebars.json +++ b/reference_versioned_sidebars/version-v4-sidebars.json @@ -3,110 +3,120 @@ { "type": "doc", "id": "index", - "label": "Welcome" + "label": "Reference", + "className": "reference-index-item" }, { "type": "category", - "label": "CLI", + "label": "Database", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "cli/overview", + "id": "database/overview", "label": "Overview" }, { "type": "doc", - "id": "cli/commands", - "label": "Commands" + "id": "database/schema", + "label": "Schema" }, { "type": "doc", - "id": "cli/operations-api-commands", - "label": "Operations API Commands" + "id": "database/api", + "label": "API" }, { "type": "doc", - "id": "cli/authentication", - "label": "Authentication" - } - ] - }, - { - "type": "category", - "label": "Configuration", - "collapsible": false, - "className": "learn-category-header", - "items": [ + "id": "database/data-loader", + "label": "Data Loader" + }, { "type": "doc", - "id": "configuration/overview", - "label": "Overview" + "id": "database/storage-algorithm", + "label": "Storage Algorithm" }, { "type": "doc", - "id": "configuration/options", - "label": "Options" + "id": "database/jobs", + "label": "Jobs" }, { "type": "doc", - "id": "configuration/operations", - "label": "Operations" - } - ] - }, - { - "type": "category", - "label": "Environment Variables", - "collapsible": false, - "className": "learn-category-header", - "items": [ + "id": "database/system-tables", + "label": "System Tables" + }, { "type": "doc", - "id": "environment-variables/overview", - "label": "Overview" + "id": "database/compaction", + "label": "Compaction" + }, + { + "type": "doc", + "id": "database/transaction", + "label": "Transaction Logging" + }, + { + "type": "doc", + "id": "database/sql", + "label": "SQL" } ] }, { "type": "category", - "label": "Static Files", + "label": "Resources", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "static-files/overview", + "id": "resources/overview", "label": "Overview" + }, + { + "type": "doc", + "id": "resources/resource-api", + "label": "Resource API" + }, + { + "type": "doc", + "id": "resources/query-optimization", + "label": "Query Optimization" } ] }, { "type": "category", - "label": "HTTP", + "label": "Components", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "http/overview", + "id": "components/overview", "label": "Overview" }, { "type": "doc", - "id": "http/configuration", - "label": "Configuration" + "id": "components/applications", + "label": "Applications" }, { "type": "doc", - "id": "http/api", - "label": "API" + "id": "components/extension-api", + "label": "Extension API" }, { "type": "doc", - "id": "http/tls", - "label": "TLS" + "id": "components/plugin-api", + "label": "Plugin API" + }, + { + "type": "doc", + "id": "components/javascript-environment", + "label": "JavaScript Environment" } ] }, @@ -114,7 +124,7 @@ "type": "category", "label": "REST", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", @@ -150,104 +160,29 @@ }, { "type": "category", - "label": "Logging", + "label": "HTTP", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "logging/overview", + "id": "http/overview", "label": "Overview" }, { "type": "doc", - "id": "logging/configuration", + "id": "http/configuration", "label": "Configuration" }, { "type": "doc", - "id": "logging/api", + "id": "http/api", "label": "API" }, { "type": "doc", - "id": "logging/operations", - "label": "Operations" - } - ] - }, - { - "type": "category", - "label": "Analytics", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "analytics/overview", - "label": "Overview" - }, - { - "type": "doc", - "id": "analytics/operations", - "label": "Operations" - } - ] - }, - { - "type": "category", - "label": "MQTT", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "mqtt/overview", - "label": "Overview" - }, - { - "type": "doc", - "id": "mqtt/configuration", - "label": "Configuration" - } - ] - }, - { - "type": "category", - "label": "GraphQL Querying", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "graphql-querying/overview", - "label": "Overview" - } - ] - }, - { - "type": "category", - "label": "Studio", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "studio/overview", - "label": "Overview" - } - ] - }, - { - "type": "category", - "label": "Fastify Routes", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "fastify-routes/overview", - "label": "Overview" + "id": "http/tls", + "label": "TLS" } ] }, @@ -255,7 +190,7 @@ "type": "category", "label": "Security", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", @@ -303,7 +238,7 @@ "type": "category", "label": "Users & Roles", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", @@ -324,115 +259,160 @@ }, { "type": "category", - "label": "Database", + "label": "CLI", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "database/overview", + "id": "cli/overview", "label": "Overview" }, { "type": "doc", - "id": "database/schema", - "label": "Schema" - }, - { - "type": "doc", - "id": "database/api", - "label": "API" + "id": "cli/commands", + "label": "Commands" }, { "type": "doc", - "id": "database/data-loader", - "label": "Data Loader" + "id": "cli/operations-api-commands", + "label": "Operations API Commands" }, { "type": "doc", - "id": "database/storage-algorithm", - "label": "Storage Algorithm" - }, + "id": "cli/authentication", + "label": "Authentication" + } + ] + }, + { + "type": "category", + "label": "Configuration", + "collapsible": false, + "className": "reference-category-header", + "items": [ { "type": "doc", - "id": "database/jobs", - "label": "Jobs" + "id": "configuration/overview", + "label": "Overview" }, { "type": "doc", - "id": "database/system-tables", - "label": "System Tables" + "id": "configuration/options", + "label": "Options" }, { "type": "doc", - "id": "database/compaction", - "label": "Compaction" - }, + "id": "configuration/operations", + "label": "Operations" + } + ] + }, + { + "type": "category", + "label": "Operations API", + "collapsible": false, + "className": "reference-category-header", + "items": [ { "type": "doc", - "id": "database/transaction", - "label": "Transaction Logging" + "id": "operations-api/overview", + "label": "Overview" }, { "type": "doc", - "id": "database/sql", - "label": "SQL" + "id": "operations-api/operations", + "label": "Operations" } ] }, { "type": "category", - "label": "Resources", + "label": "Logging", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "resources/overview", + "id": "logging/overview", "label": "Overview" }, { "type": "doc", - "id": "resources/resource-api", - "label": "Resource API" + "id": "logging/configuration", + "label": "Configuration" }, { "type": "doc", - "id": "resources/query-optimization", - "label": "Query Optimization" + "id": "logging/api", + "label": "API" + }, + { + "type": "doc", + "id": "logging/operations", + "label": "Operations" } ] }, { "type": "category", - "label": "Components", + "label": "Analytics", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "components/overview", + "id": "analytics/overview", "label": "Overview" }, { "type": "doc", - "id": "components/applications", - "label": "Applications" - }, + "id": "analytics/operations", + "label": "Operations" + } + ] + }, + { + "type": "category", + "label": "MQTT", + "collapsible": false, + "className": "reference-category-header", + "items": [ { "type": "doc", - "id": "components/extension-api", - "label": "Extension API" + "id": "mqtt/overview", + "label": "Overview" }, { "type": "doc", - "id": "components/plugin-api", - "label": "Plugin API" - }, + "id": "mqtt/configuration", + "label": "Configuration" + } + ] + }, + { + "type": "category", + "label": "Static Files", + "collapsible": false, + "className": "reference-category-header", + "items": [ { "type": "doc", - "id": "components/javascript-environment", - "label": "JavaScript Environment" + "id": "static-files/overview", + "label": "Overview" + } + ] + }, + { + "type": "category", + "label": "Environment Variables", + "collapsible": false, + "className": "reference-category-header", + "items": [ + { + "type": "doc", + "id": "environment-variables/overview", + "label": "Overview" } ] }, @@ -440,7 +420,7 @@ "type": "category", "label": "Replication", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", @@ -461,19 +441,40 @@ }, { "type": "category", - "label": "Operations API", + "label": "GraphQL Querying", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", - "id": "operations-api/overview", + "id": "graphql-querying/overview", "label": "Overview" - }, + } + ] + }, + { + "type": "category", + "label": "Studio", + "collapsible": false, + "className": "reference-category-header", + "items": [ { "type": "doc", - "id": "operations-api/operations", - "label": "Operations" + "id": "studio/overview", + "label": "Overview" + } + ] + }, + { + "type": "category", + "label": "Fastify Routes", + "collapsible": false, + "className": "reference-category-header", + "items": [ + { + "type": "doc", + "id": "fastify-routes/overview", + "label": "Overview" } ] }, @@ -481,7 +482,7 @@ "type": "category", "label": "Legacy", "collapsible": false, - "className": "learn-category-header", + "className": "reference-category-header", "items": [ { "type": "doc", diff --git a/src/css/custom.css b/src/css/custom.css index 22610ac3..ac38eb3c 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -58,3 +58,40 @@ code, color: #adb5bd !important; /* Lighter grey for dark mode */ border-bottom-color: #495057; /* Darker underline for dark mode */ } + +/* Reference sidebar index/welcome item */ +.reference-index-item > .menu__link { + font-size: 0.875rem; +} + +/* Compact reference sidebar category headers */ +.reference-category-header > .menu__list-item-collapsible:hover { + background: none; +} + +.reference-category-header > .menu__list-item-collapsible > .menu__link { + color: #6c757d !important; + border-radius: 0; + border-bottom: 1px solid #dee2e6; + padding-bottom: 0.1rem; + margin-bottom: 0.2rem; + cursor: default; + font-weight: 600; + pointer-events: none; + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +/* Compact item links inside reference categories */ +.reference-category-header > .menu__list .menu__link { + font-size: 0.875rem; + padding-top: 0.2rem; + padding-bottom: 0.2rem; +} + +/* Dark mode for reference category headers */ +[data-theme='dark'] .reference-category-header > .menu__list-item-collapsible > .menu__link { + color: #adb5bd !important; + border-bottom-color: #495057; +} From a7fe14e38acf78535b7095a7b062525a38916b53 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 08:41:19 -0600 Subject: [PATCH 41/51] copy to v5 sidebar --- sidebarsReference.ts | 361 ++++++++++++++++++++++--------------------- 1 file changed, 181 insertions(+), 180 deletions(-) diff --git a/sidebarsReference.ts b/sidebarsReference.ts index 878c3d7f..34c20b97 100644 --- a/sidebarsReference.ts +++ b/sidebarsReference.ts @@ -5,110 +5,120 @@ const sidebars: SidebarsConfig = { { type: 'doc', id: 'index', - label: 'Welcome', + label: 'Reference', + className: 'reference-index-item', }, { type: 'category', - label: 'CLI', + label: 'Database', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'cli/overview', + id: 'database/overview', label: 'Overview', }, { type: 'doc', - id: 'cli/commands', - label: 'Commands', + id: 'database/schema', + label: 'Schema', }, { type: 'doc', - id: 'cli/operations-api-commands', - label: 'Operations API Commands', + id: 'database/api', + label: 'API', }, { type: 'doc', - id: 'cli/authentication', - label: 'Authentication', + id: 'database/data-loader', + label: 'Data Loader', }, - ], - }, - { - type: 'category', - label: 'Configuration', - collapsible: false, - className: 'learn-category-header', - items: [ { type: 'doc', - id: 'configuration/overview', - label: 'Overview', + id: 'database/storage-algorithm', + label: 'Storage Algorithm', }, { type: 'doc', - id: 'configuration/options', - label: 'Options', + id: 'database/jobs', + label: 'Jobs', }, { type: 'doc', - id: 'configuration/operations', - label: 'Operations', + id: 'database/system-tables', + label: 'System Tables', }, - ], - }, - { - type: 'category', - label: 'Environment Variables', - collapsible: false, - className: 'learn-category-header', - items: [ { type: 'doc', - id: 'environment-variables/overview', - label: 'Overview', + id: 'database/compaction', + label: 'Compaction', + }, + { + type: 'doc', + id: 'database/transaction', + label: 'Transaction Logging', + }, + { + type: 'doc', + id: 'database/sql', + label: 'SQL', }, ], }, { type: 'category', - label: 'Static Files', + label: 'Resources', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'static-files/overview', + id: 'resources/overview', label: 'Overview', }, + { + type: 'doc', + id: 'resources/resource-api', + label: 'Resource API', + }, + { + type: 'doc', + id: 'resources/query-optimization', + label: 'Query Optimization', + }, ], }, { type: 'category', - label: 'HTTP', + label: 'Components', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'http/overview', + id: 'components/overview', label: 'Overview', }, { type: 'doc', - id: 'http/configuration', - label: 'Configuration', + id: 'components/applications', + label: 'Applications', }, { type: 'doc', - id: 'http/api', - label: 'API', + id: 'components/extension-api', + label: 'Extension API', }, { type: 'doc', - id: 'http/tls', - label: 'TLS', + id: 'components/plugin-api', + label: 'Plugin API', + }, + { + type: 'doc', + id: 'components/javascript-environment', + label: 'JavaScript Environment', }, ], }, @@ -116,7 +126,7 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'REST', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', @@ -152,104 +162,29 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Logging', + label: 'HTTP', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'logging/overview', + id: 'http/overview', label: 'Overview', }, { type: 'doc', - id: 'logging/configuration', + id: 'http/configuration', label: 'Configuration', }, { type: 'doc', - id: 'logging/api', + id: 'http/api', label: 'API', }, { type: 'doc', - id: 'logging/operations', - label: 'Operations', - }, - ], - }, - { - type: 'category', - label: 'Analytics', - collapsible: false, - className: 'learn-category-header', - items: [ - { - type: 'doc', - id: 'analytics/overview', - label: 'Overview', - }, - { - type: 'doc', - id: 'analytics/operations', - label: 'Operations', - }, - ], - }, - { - type: 'category', - label: 'MQTT', - collapsible: false, - className: 'learn-category-header', - items: [ - { - type: 'doc', - id: 'mqtt/overview', - label: 'Overview', - }, - { - type: 'doc', - id: 'mqtt/configuration', - label: 'Configuration', - }, - ], - }, - { - type: 'category', - label: 'GraphQL Querying', - collapsible: false, - className: 'learn-category-header', - items: [ - { - type: 'doc', - id: 'graphql-querying/overview', - label: 'Overview', - }, - ], - }, - { - type: 'category', - label: 'Studio', - collapsible: false, - className: 'learn-category-header', - items: [ - { - type: 'doc', - id: 'studio/overview', - label: 'Overview', - }, - ], - }, - { - type: 'category', - label: 'Fastify Routes', - collapsible: false, - className: 'learn-category-header', - items: [ - { - type: 'doc', - id: 'fastify-routes/overview', - label: 'Overview', + id: 'http/tls', + label: 'TLS', }, ], }, @@ -257,7 +192,7 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Security', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', @@ -305,7 +240,7 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Users & Roles', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', @@ -326,115 +261,160 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Database', + label: 'CLI', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'database/overview', + id: 'cli/overview', label: 'Overview', }, { type: 'doc', - id: 'database/schema', - label: 'Schema', - }, - { - type: 'doc', - id: 'database/api', - label: 'API', + id: 'cli/commands', + label: 'Commands', }, { type: 'doc', - id: 'database/data-loader', - label: 'Data Loader', + id: 'cli/operations-api-commands', + label: 'Operations API Commands', }, { type: 'doc', - id: 'database/storage-algorithm', - label: 'Storage Algorithm', + id: 'cli/authentication', + label: 'Authentication', }, + ], + }, + { + type: 'category', + label: 'Configuration', + collapsible: false, + className: 'reference-category-header', + items: [ { type: 'doc', - id: 'database/jobs', - label: 'Jobs', + id: 'configuration/overview', + label: 'Overview', }, { type: 'doc', - id: 'database/system-tables', - label: 'System Tables', + id: 'configuration/options', + label: 'Options', }, { type: 'doc', - id: 'database/compaction', - label: 'Compaction', + id: 'configuration/operations', + label: 'Operations', }, + ], + }, + { + type: 'category', + label: 'Operations API', + collapsible: false, + className: 'reference-category-header', + items: [ { type: 'doc', - id: 'database/transaction', - label: 'Transaction Logging', + id: 'operations-api/overview', + label: 'Overview', }, { type: 'doc', - id: 'database/sql', - label: 'SQL', + id: 'operations-api/operations', + label: 'Operations', }, ], }, { type: 'category', - label: 'Resources', + label: 'Logging', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'resources/overview', + id: 'logging/overview', label: 'Overview', }, { type: 'doc', - id: 'resources/resource-api', - label: 'Resource API', + id: 'logging/configuration', + label: 'Configuration', }, { type: 'doc', - id: 'resources/query-optimization', - label: 'Query Optimization', + id: 'logging/api', + label: 'API', + }, + { + type: 'doc', + id: 'logging/operations', + label: 'Operations', }, ], }, { type: 'category', - label: 'Components', + label: 'Analytics', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'components/overview', + id: 'analytics/overview', label: 'Overview', }, { type: 'doc', - id: 'components/applications', - label: 'Applications', + id: 'analytics/operations', + label: 'Operations', }, + ], + }, + { + type: 'category', + label: 'MQTT', + collapsible: false, + className: 'reference-category-header', + items: [ { type: 'doc', - id: 'components/extension-api', - label: 'Extension API', + id: 'mqtt/overview', + label: 'Overview', }, { type: 'doc', - id: 'components/plugin-api', - label: 'Plugin API', + id: 'mqtt/configuration', + label: 'Configuration', }, + ], + }, + { + type: 'category', + label: 'Static Files', + collapsible: false, + className: 'reference-category-header', + items: [ { type: 'doc', - id: 'components/javascript-environment', - label: 'JavaScript Environment', + id: 'static-files/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'Environment Variables', + collapsible: false, + className: 'reference-category-header', + items: [ + { + type: 'doc', + id: 'environment-variables/overview', + label: 'Overview', }, ], }, @@ -442,7 +422,7 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Replication', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', @@ -463,19 +443,40 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Operations API', + label: 'GraphQL Querying', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', - id: 'operations-api/overview', + id: 'graphql-querying/overview', label: 'Overview', }, + ], + }, + { + type: 'category', + label: 'Studio', + collapsible: false, + className: 'reference-category-header', + items: [ { type: 'doc', - id: 'operations-api/operations', - label: 'Operations', + id: 'studio/overview', + label: 'Overview', + }, + ], + }, + { + type: 'category', + label: 'Fastify Routes', + collapsible: false, + className: 'reference-category-header', + items: [ + { + type: 'doc', + id: 'fastify-routes/overview', + label: 'Overview', }, ], }, @@ -483,7 +484,7 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Legacy', collapsible: false, - className: 'learn-category-header', + className: 'reference-category-header', items: [ { type: 'doc', From 8f740b747ac9a40977496835d5df7e5b1bf4f0e7 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 08:59:02 -0600 Subject: [PATCH 42/51] add index pages --- redirects.ts | 1 - reference/index.md | 58 +++++++++++++++++++- reference_versioned_docs/version-v4/index.md | 58 +++++++++++++++++++- src/components/CustomDocCardList.module.css | 19 +++++++ src/components/CustomDocCardList.tsx | 1 + src/pages/index.mdx | 47 +++++++--------- src/pages/reference/index.tsx | 4 +- 7 files changed, 153 insertions(+), 35 deletions(-) diff --git a/redirects.ts b/redirects.ts index 5b387cf8..62f1fbdb 100644 --- a/redirects.ts +++ b/redirects.ts @@ -23,7 +23,6 @@ type RedirectRule = { // ─── Static redirect rules ──────────────────────────────────────────────────── // All paths sourced from GA pageview data (Oct 2025–Feb 2026). // Non-versioned /docs/* paths → /reference/v5/ (current version) -// Paths with <10 views are marked LOW TRAFFIC. const currentRedirects: RedirectRule[] = [ // ── Docs root ────────────────────────────────────────────────────────────── diff --git a/reference/index.md b/reference/index.md index 976c77b0..f4911b98 100644 --- a/reference/index.md +++ b/reference/index.md @@ -1,3 +1,57 @@ -# future v5 docs +--- +title: Reference +--- -replace with final product of /reference_versioned_docs/version-v4/ +# Harper v5 Reference + +Complete technical reference for Harper v5. Each section covers a core feature or subsystem — configuration options, APIs, and operational details. + +For concept introductions, tutorials, and guides, see the [Learn](/learn) section. + +## Sections + +### Data & Application + +| Section | Description | +|---|---| +| [Database](./database/overview.md) | Schema system, storage, indexing, transactions, and the database JS API | +| [Resources](./resources/overview.md) | Custom resource classes, the Resource API, and query optimization | +| [Components](./components/overview.md) | Applications, extensions, the Plugin API, and the JS environment | + +### Access & Security + +| Section | Description | +|---|---| +| [REST](./rest/overview.md) | Auto-REST interface, querying, content types, headers, WebSockets, and SSE | +| [HTTP](./http/overview.md) | HTTP server configuration, TLS, and the `server` API | +| [Security](./security/overview.md) | Authentication mechanisms, certificates, and CORS/SSL configuration | +| [Users & Roles](./users-and-roles/overview.md) | RBAC, roles configuration, and user management operations | + +### Setup & Operation + +| Section | Description | +|---|---| +| [CLI](./cli/overview.md) | All CLI commands, Operations API commands, and authentication | +| [Configuration](./configuration/overview.md) | `harperdb-config.yaml` options and configuration operations | +| [Operations API](./operations-api/overview.md) | Full index of all Operations API operations | + +### Features + +| Section | Description | +|---|---| +| [Logging](./logging/overview.md) | Log configuration, the `logger` API, and log management operations | +| [Analytics](./analytics/overview.md) | Resource and storage analytics, system tables | +| [MQTT](./mqtt/overview.md) | MQTT broker configuration and usage | +| [Static Files](./static-files/overview.md) | Static file serving via the `static` plugin | +| [Environment Variables](./environment-variables/overview.md) | Environment variable loading via the `loadEnv` plugin | +| [Replication](./replication/overview.md) | Native replication, clustering, and sharding | +| [GraphQL Querying](./graphql-querying/overview.md) | Experimental GraphQL support | +| [Studio](./studio/overview.md) | Local Studio UI configuration and access | +| [Fastify Routes](./fastify-routes/overview.md) | Fastify route definitions (discouraged in favor of components) | + +### Legacy + +| Section | Description | +|---|---| +| [Harper Cloud](./legacy/cloud.md) | Legacy Harper Cloud documentation — see Fabric for current cloud hosting | +| [Custom Functions](./legacy/custom-functions.md) | Deprecated predecessor to Components | diff --git a/reference_versioned_docs/version-v4/index.md b/reference_versioned_docs/version-v4/index.md index fbeeb997..a36de1e0 100644 --- a/reference_versioned_docs/version-v4/index.md +++ b/reference_versioned_docs/version-v4/index.md @@ -1,3 +1,57 @@ -# v4 +--- +title: Reference +--- -replace with new reference docs content +# Harper v4 Reference + +Complete technical reference for Harper v4. Each section covers a core feature or subsystem — configuration options, APIs, and operational details. + +For concept introductions, tutorials, and guides, see the [Learn](/learn) section. + +## Sections + +### Data & Application + +| Section | Description | +|---|---| +| [Database](./database/overview.md) | Schema system, storage, indexing, transactions, and the database JS API | +| [Resources](./resources/overview.md) | Custom resource classes, the Resource API, and query optimization | +| [Components](./components/overview.md) | Applications, extensions, the Plugin API, and the JS environment | + +### Access & Security + +| Section | Description | +|---|---| +| [REST](./rest/overview.md) | Auto-REST interface, querying, content types, headers, WebSockets, and SSE | +| [HTTP](./http/overview.md) | HTTP server configuration, TLS, and the `server` API | +| [Security](./security/overview.md) | Authentication mechanisms, certificates, and CORS/SSL configuration | +| [Users & Roles](./users-and-roles/overview.md) | RBAC, roles configuration, and user management operations | + +### Setup & Operation + +| Section | Description | +|---|---| +| [CLI](./cli/overview.md) | All CLI commands, Operations API commands, and authentication | +| [Configuration](./configuration/overview.md) | `harperdb-config.yaml` options and configuration operations | +| [Operations API](./operations-api/overview.md) | Full index of all Operations API operations | + +### Features + +| Section | Description | +|---|---| +| [Logging](./logging/overview.md) | Log configuration, the `logger` API, and log management operations | +| [Analytics](./analytics/overview.md) | Resource and storage analytics, system tables | +| [MQTT](./mqtt/overview.md) | MQTT broker configuration and usage | +| [Static Files](./static-files/overview.md) | Static file serving via the `static` plugin | +| [Environment Variables](./environment-variables/overview.md) | Environment variable loading via the `loadEnv` plugin | +| [Replication](./replication/overview.md) | Native replication, clustering, and sharding | +| [GraphQL Querying](./graphql-querying/overview.md) | Experimental GraphQL support | +| [Studio](./studio/overview.md) | Local Studio UI configuration and access | +| [Fastify Routes](./fastify-routes/overview.md) | Fastify route definitions (discouraged in favor of components) | + +### Legacy + +| Section | Description | +|---|---| +| [Harper Cloud](./legacy/cloud.md) | Legacy Harper Cloud documentation — see Fabric for current cloud hosting | +| [Custom Functions](./legacy/custom-functions.md) | Deprecated predecessor to Components | diff --git a/src/components/CustomDocCardList.module.css b/src/components/CustomDocCardList.module.css index 349a65b7..66dfad52 100644 --- a/src/components/CustomDocCardList.module.css +++ b/src/components/CustomDocCardList.module.css @@ -34,6 +34,25 @@ margin-bottom: 0; } +.cardBadge { + display: inline-block; + align-self: flex-start; + margin-bottom: 0.5rem; + padding: 0.2em 0.6em; + font-size: 0.6rem; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.06em; + border-radius: 999px; + background-color: var(--ifm-color-primary); + color: #fff; +} + +[data-theme='dark'] .cardBadge { + background-color: var(--ifm-color-primary); + color: #1a1a2e; +} + /* Responsive breakpoints */ @media (max-width: 996px) { .cardGrid { diff --git a/src/components/CustomDocCardList.tsx b/src/components/CustomDocCardList.tsx index c260724f..99752757 100644 --- a/src/components/CustomDocCardList.tsx +++ b/src/components/CustomDocCardList.tsx @@ -14,6 +14,7 @@ interface CustomDocCardListProps { function CustomDocCard({ item }: { item: any }) { return ( + {item.badge && {item.badge}} {item.label} diff --git a/src/pages/index.mdx b/src/pages/index.mdx index b18ffad3..862c4e5a 100644 --- a/src/pages/index.mdx +++ b/src/pages/index.mdx @@ -1,55 +1,46 @@ --- -title: Welcome to Harper Documentation Site +title: Harper Documentation --- import CustomDocCardList from '@site/src/components/CustomDocCardList'; # Harper Docs -:::info +Harper is an all-in-one backend that fuses a database, caching, application hosting, and messaging into a single system — eliminating the serialization overhead and network latency of traditional multi-service architectures. -### Get the Most Out of Harper +:::info Join the Community -Join our Discord to access expert support, collaborate with Harper's core team, and stay up to date on the latest platform updates. [Join Our Discord →](https://harper.fast/discord) -::: - -Harper is an all-in-one backend technology that fuses database technologies, caching, application hosting, and messaging functions into a single system. Unlike traditional architectures where each piece runs independently and incurs extra costs and latency from serialization and network operations between processes, Harper systems can handle workloads seamlessly and efficiently. - -Here, you'll find all things Harper, and everything you need to get started, troubleshoot issues, and make the most of our platform. +Get help from Harper's core team, collaborate with other developers, and stay current on platform updates. [Join our Discord →](https://harper.fast/discord) -## Getting Started - -The best way to get started using Harper is to head over to the [Learn](/learn/) section and work through the Getting Started and Developer guides. - -## Building with Harper +::: diff --git a/src/pages/reference/index.tsx b/src/pages/reference/index.tsx index acd319d9..4d6dcf30 100644 --- a/src/pages/reference/index.tsx +++ b/src/pages/reference/index.tsx @@ -5,8 +5,8 @@ export default function ReferenceRedirect() { const history = useHistory(); useEffect(() => { - // Redirect to the v4 reference docs - history.replace('/reference/v4'); + // Redirect to the latest reference docs + history.replace('/reference/v5'); }, [history]); return null; From 9aee72d714cb458c7622f4b9f8bfa9f5cf67251a Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 09:00:55 -0600 Subject: [PATCH 43/51] format --- reference/index.md | 62 ++++++++++---------- reference_versioned_docs/version-v4/index.md | 62 ++++++++++---------- src/pages/index.mdx | 2 +- 3 files changed, 63 insertions(+), 63 deletions(-) diff --git a/reference/index.md b/reference/index.md index f4911b98..8617b40a 100644 --- a/reference/index.md +++ b/reference/index.md @@ -12,46 +12,46 @@ For concept introductions, tutorials, and guides, see the [Learn](/learn) sectio ### Data & Application -| Section | Description | -|---|---| -| [Database](./database/overview.md) | Schema system, storage, indexing, transactions, and the database JS API | -| [Resources](./resources/overview.md) | Custom resource classes, the Resource API, and query optimization | -| [Components](./components/overview.md) | Applications, extensions, the Plugin API, and the JS environment | +| Section | Description | +| -------------------------------------- | ----------------------------------------------------------------------- | +| [Database](./database/overview.md) | Schema system, storage, indexing, transactions, and the database JS API | +| [Resources](./resources/overview.md) | Custom resource classes, the Resource API, and query optimization | +| [Components](./components/overview.md) | Applications, extensions, the Plugin API, and the JS environment | ### Access & Security -| Section | Description | -|---|---| -| [REST](./rest/overview.md) | Auto-REST interface, querying, content types, headers, WebSockets, and SSE | -| [HTTP](./http/overview.md) | HTTP server configuration, TLS, and the `server` API | -| [Security](./security/overview.md) | Authentication mechanisms, certificates, and CORS/SSL configuration | -| [Users & Roles](./users-and-roles/overview.md) | RBAC, roles configuration, and user management operations | +| Section | Description | +| ---------------------------------------------- | -------------------------------------------------------------------------- | +| [REST](./rest/overview.md) | Auto-REST interface, querying, content types, headers, WebSockets, and SSE | +| [HTTP](./http/overview.md) | HTTP server configuration, TLS, and the `server` API | +| [Security](./security/overview.md) | Authentication mechanisms, certificates, and CORS/SSL configuration | +| [Users & Roles](./users-and-roles/overview.md) | RBAC, roles configuration, and user management operations | ### Setup & Operation -| Section | Description | -|---|---| -| [CLI](./cli/overview.md) | All CLI commands, Operations API commands, and authentication | -| [Configuration](./configuration/overview.md) | `harperdb-config.yaml` options and configuration operations | -| [Operations API](./operations-api/overview.md) | Full index of all Operations API operations | +| Section | Description | +| ---------------------------------------------- | ------------------------------------------------------------- | +| [CLI](./cli/overview.md) | All CLI commands, Operations API commands, and authentication | +| [Configuration](./configuration/overview.md) | `harperdb-config.yaml` options and configuration operations | +| [Operations API](./operations-api/overview.md) | Full index of all Operations API operations | ### Features -| Section | Description | -|---|---| -| [Logging](./logging/overview.md) | Log configuration, the `logger` API, and log management operations | -| [Analytics](./analytics/overview.md) | Resource and storage analytics, system tables | -| [MQTT](./mqtt/overview.md) | MQTT broker configuration and usage | -| [Static Files](./static-files/overview.md) | Static file serving via the `static` plugin | -| [Environment Variables](./environment-variables/overview.md) | Environment variable loading via the `loadEnv` plugin | -| [Replication](./replication/overview.md) | Native replication, clustering, and sharding | -| [GraphQL Querying](./graphql-querying/overview.md) | Experimental GraphQL support | -| [Studio](./studio/overview.md) | Local Studio UI configuration and access | -| [Fastify Routes](./fastify-routes/overview.md) | Fastify route definitions (discouraged in favor of components) | +| Section | Description | +| ------------------------------------------------------------ | ------------------------------------------------------------------ | +| [Logging](./logging/overview.md) | Log configuration, the `logger` API, and log management operations | +| [Analytics](./analytics/overview.md) | Resource and storage analytics, system tables | +| [MQTT](./mqtt/overview.md) | MQTT broker configuration and usage | +| [Static Files](./static-files/overview.md) | Static file serving via the `static` plugin | +| [Environment Variables](./environment-variables/overview.md) | Environment variable loading via the `loadEnv` plugin | +| [Replication](./replication/overview.md) | Native replication, clustering, and sharding | +| [GraphQL Querying](./graphql-querying/overview.md) | Experimental GraphQL support | +| [Studio](./studio/overview.md) | Local Studio UI configuration and access | +| [Fastify Routes](./fastify-routes/overview.md) | Fastify route definitions (discouraged in favor of components) | ### Legacy -| Section | Description | -|---|---| -| [Harper Cloud](./legacy/cloud.md) | Legacy Harper Cloud documentation — see Fabric for current cloud hosting | -| [Custom Functions](./legacy/custom-functions.md) | Deprecated predecessor to Components | +| Section | Description | +| ------------------------------------------------ | ------------------------------------------------------------------------ | +| [Harper Cloud](./legacy/cloud.md) | Legacy Harper Cloud documentation — see Fabric for current cloud hosting | +| [Custom Functions](./legacy/custom-functions.md) | Deprecated predecessor to Components | diff --git a/reference_versioned_docs/version-v4/index.md b/reference_versioned_docs/version-v4/index.md index a36de1e0..28b5afbc 100644 --- a/reference_versioned_docs/version-v4/index.md +++ b/reference_versioned_docs/version-v4/index.md @@ -12,46 +12,46 @@ For concept introductions, tutorials, and guides, see the [Learn](/learn) sectio ### Data & Application -| Section | Description | -|---|---| -| [Database](./database/overview.md) | Schema system, storage, indexing, transactions, and the database JS API | -| [Resources](./resources/overview.md) | Custom resource classes, the Resource API, and query optimization | -| [Components](./components/overview.md) | Applications, extensions, the Plugin API, and the JS environment | +| Section | Description | +| -------------------------------------- | ----------------------------------------------------------------------- | +| [Database](./database/overview.md) | Schema system, storage, indexing, transactions, and the database JS API | +| [Resources](./resources/overview.md) | Custom resource classes, the Resource API, and query optimization | +| [Components](./components/overview.md) | Applications, extensions, the Plugin API, and the JS environment | ### Access & Security -| Section | Description | -|---|---| -| [REST](./rest/overview.md) | Auto-REST interface, querying, content types, headers, WebSockets, and SSE | -| [HTTP](./http/overview.md) | HTTP server configuration, TLS, and the `server` API | -| [Security](./security/overview.md) | Authentication mechanisms, certificates, and CORS/SSL configuration | -| [Users & Roles](./users-and-roles/overview.md) | RBAC, roles configuration, and user management operations | +| Section | Description | +| ---------------------------------------------- | -------------------------------------------------------------------------- | +| [REST](./rest/overview.md) | Auto-REST interface, querying, content types, headers, WebSockets, and SSE | +| [HTTP](./http/overview.md) | HTTP server configuration, TLS, and the `server` API | +| [Security](./security/overview.md) | Authentication mechanisms, certificates, and CORS/SSL configuration | +| [Users & Roles](./users-and-roles/overview.md) | RBAC, roles configuration, and user management operations | ### Setup & Operation -| Section | Description | -|---|---| -| [CLI](./cli/overview.md) | All CLI commands, Operations API commands, and authentication | -| [Configuration](./configuration/overview.md) | `harperdb-config.yaml` options and configuration operations | -| [Operations API](./operations-api/overview.md) | Full index of all Operations API operations | +| Section | Description | +| ---------------------------------------------- | ------------------------------------------------------------- | +| [CLI](./cli/overview.md) | All CLI commands, Operations API commands, and authentication | +| [Configuration](./configuration/overview.md) | `harperdb-config.yaml` options and configuration operations | +| [Operations API](./operations-api/overview.md) | Full index of all Operations API operations | ### Features -| Section | Description | -|---|---| -| [Logging](./logging/overview.md) | Log configuration, the `logger` API, and log management operations | -| [Analytics](./analytics/overview.md) | Resource and storage analytics, system tables | -| [MQTT](./mqtt/overview.md) | MQTT broker configuration and usage | -| [Static Files](./static-files/overview.md) | Static file serving via the `static` plugin | -| [Environment Variables](./environment-variables/overview.md) | Environment variable loading via the `loadEnv` plugin | -| [Replication](./replication/overview.md) | Native replication, clustering, and sharding | -| [GraphQL Querying](./graphql-querying/overview.md) | Experimental GraphQL support | -| [Studio](./studio/overview.md) | Local Studio UI configuration and access | -| [Fastify Routes](./fastify-routes/overview.md) | Fastify route definitions (discouraged in favor of components) | +| Section | Description | +| ------------------------------------------------------------ | ------------------------------------------------------------------ | +| [Logging](./logging/overview.md) | Log configuration, the `logger` API, and log management operations | +| [Analytics](./analytics/overview.md) | Resource and storage analytics, system tables | +| [MQTT](./mqtt/overview.md) | MQTT broker configuration and usage | +| [Static Files](./static-files/overview.md) | Static file serving via the `static` plugin | +| [Environment Variables](./environment-variables/overview.md) | Environment variable loading via the `loadEnv` plugin | +| [Replication](./replication/overview.md) | Native replication, clustering, and sharding | +| [GraphQL Querying](./graphql-querying/overview.md) | Experimental GraphQL support | +| [Studio](./studio/overview.md) | Local Studio UI configuration and access | +| [Fastify Routes](./fastify-routes/overview.md) | Fastify route definitions (discouraged in favor of components) | ### Legacy -| Section | Description | -|---|---| -| [Harper Cloud](./legacy/cloud.md) | Legacy Harper Cloud documentation — see Fabric for current cloud hosting | -| [Custom Functions](./legacy/custom-functions.md) | Deprecated predecessor to Components | +| Section | Description | +| ------------------------------------------------ | ------------------------------------------------------------------------ | +| [Harper Cloud](./legacy/cloud.md) | Legacy Harper Cloud documentation — see Fabric for current cloud hosting | +| [Custom Functions](./legacy/custom-functions.md) | Deprecated predecessor to Components | diff --git a/src/pages/index.mdx b/src/pages/index.mdx index 862c4e5a..8b5f91a6 100644 --- a/src/pages/index.mdx +++ b/src/pages/index.mdx @@ -40,7 +40,7 @@ Get help from Harper's core team, collaborate with other developers, and stay cu type: 'link', href: '/fabric', label: 'Fabric', - description: 'Harper\'s managed cloud platform — deploy, scale, and operate Harper without the infrastructure', + description: "Harper's managed cloud platform — deploy, scale, and operate Harper without the infrastructure", }, ]} /> From 21b72c9348022444efa6a5566715c3e77e087472 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 09:07:27 -0600 Subject: [PATCH 44/51] start on some new dev docs --- CONTRIBUTING.md | 158 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 152 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1bdfd7c2..f5cb1b22 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,17 +32,163 @@ npm run format ## Site Organization -This site is powered by Docusaurus and leverages the file-system based versioning capabilities of the framework. +This site is powered by Docusaurus. The documentation is split into four distinct sections, each serving a different purpose and configured as its own Docusaurus plugin instance. -There are two directories where actual documentation content lives. +### The Four Sections -The first, `docs/` contains the "latest" or "next" version of the documentation. We do not publish or render this directory, and the content here is meant to represent on-going development. +| Section | URL | Purpose | +| ----------------- | ---------------- | ---------------------------------------------------------------------------------------------------------------------------- | +| **Learn** | `/learn` | Guides, tutorials, and conceptual introductions. How-to content that walks users through a goal. | +| **Reference** | `/reference/vN` | Complete technical reference for every feature, API, configuration option, and operation. Versioned by major Harper version. | +| **Release Notes** | `/release-notes` | Changelog for every Harper release. Organized by major version codename (e.g. `v4-tucker`). | +| **Fabric** | `/fabric` | Documentation for Harper's managed cloud platform. Separate from the core Harper product docs. | -The second, `versioned_docs` contains all of the specific Harper version documentation organized by minor version. The latest version within this directory maps to the default path on the site. For example, if the latest version is `versioned_docs/version-4.6/` then the page https://docs.harperdb.io/docs/getting-started/first-harper-app maps to the file `site/versioned_docs/version-4.6/getting-started/first-harper-app.md`. And for the previous 4.5 version the page http://localhost:3000/docs/4.5/getting-started/first-harper-app can be found at `site/versioned_docs/version-4.5/getting-started/first-harper-app.md`. +**Rule of thumb**: if it explains _how something works_ or _what something does_, it belongs in Reference. If it explains _how to accomplish something_, it belongs in Learn. New feature documentation always goes in Reference first; Learn guides can link to it. -Depending on the specific change, you may need to make updates to similar files across multiple version directories as well as the root `docs/`. +### Section Configuration Map -The site organization is ever evolving so make sure to revisit this file over time to stay up to date with the latest structure. +Each section maps to specific source directories and config files: + +#### Learn + +| Item | Location | +| ------------- | ------------------------------------------ | +| Content | `learn/` | +| Sidebar | `sidebarsLearn.ts` | +| Plugin config | `docusaurus.config.ts` → plugin id `learn` | + +Learn is non-versioned. All content lives directly in `learn/` organized into categories that match the sidebar. + +#### Reference + +| Item | Location | +| --------------------- | ------------------------------------------------------- | +| Current (v5) content | `reference/` | +| Archived (v4) content | `reference_versioned_docs/version-v4/` | +| Current sidebar | `sidebarsReference.ts` | +| v4 sidebar | `reference_versioned_sidebars/version-v4-sidebars.json` | +| Version list | `reference_versions.json` | +| Plugin config | `docusaurus.config.ts` → plugin id `reference` | + +Reference is versioned by major Harper version. The `reference_versions.json` file lists all archived versions — currently `["current", "v4"]`. The `current` version maps to `v5` (the in-progress next major) and is not published; `v4` is the default displayed version. + +To cut a new version snapshot (e.g. when v5 ships), run: + +```bash +node scripts/cut-version.js +``` + +#### Release Notes + +| Item | Location | +| ------------- | -------------------------------------------------- | +| Content | `release-notes/` | +| Sidebar | `sidebarsReleaseNotes.ts` | +| Plugin config | `docusaurus.config.ts` → plugin id `release-notes` | + +Release notes are non-versioned in the Docusaurus sense — major version organization is handled manually via subdirectories (`v4-tucker/`, `v3-monkey/`, etc.). The sidebar uses `autogenerated` directives so new files are picked up automatically. See the [Release Notes Process](#release-notes-process) section for the full workflow. + +#### Fabric + +| Item | Location | +| ------------- | ------------------------------------------- | +| Content | `fabric/` | +| Sidebar | `sidebarsFabric.ts` | +| Plugin config | `docusaurus.config.ts` → plugin id `fabric` | + +Fabric is non-versioned. It documents the managed cloud platform independently of the Harper core product. + +--- + +### Reference Section Structure + +The Reference section is organized as a flat list of feature-based sections — no deep nesting. Each top-level section corresponds to one Harper feature or subsystem. + +#### Section Layout + +Every section follows this pattern: + +``` +reference/ +└── {feature}/ + ├── overview.md # General introduction, architecture, concepts + ├── configuration.md # Config options specific to this feature (if applicable) + ├── api.md # JS/programmatic API reference (if applicable) + └── operations.md # Operations API operations for this feature (if applicable) +``` + +Not every section needs all four files — some features only warrant an `overview.md`. The filenames above are conventions, not requirements. + +#### Section Order + +Sections are ordered in the sidebar by who needs them first: + +1. **Data & Application** — Database, Resources, Components +2. **Access & Security** — REST, HTTP, Security, Users & Roles +3. **Setup & Operation** — CLI, Configuration, Operations API +4. **Features** — Logging, Analytics, MQTT, Static Files, Environment Variables, Replication, GraphQL Querying, Studio, Fastify Routes +5. **Legacy** — Deprecated or discouraged features + +#### Sidebar Headers + +Reference sidebar headers use `className: "reference-category-header"` for compact styling. This is set on each category entry in the sidebar config. Do not use `learn-category-header` in reference sidebars. + +#### Legacy Section + +Deprecated or discouraged features belong in `reference/legacy/` (current) or `reference_versioned_docs/version-v4/legacy/` (v4). Each legacy page should briefly explain what the feature was and direct users to the modern alternative. + +--- + +### Version Annotations + +Because the Reference section consolidates all minor versions of a major into one document, features are annotated inline to indicate when they were introduced or changed. Follow the Node.js documentation convention: + +**New feature:** + +```markdown +## Relationships + +Added in: v4.3.0 + +The `@relation` directive allows you to define relationships between tables... +``` + +**Changed behavior:** + +```markdown +### Default Port + +Changed in: v4.5.0 + +The default MQTT port changed from 9925 to 9933. +In previous versions of v4, the default was 9925. +``` + +**Deprecated feature:** + +```markdown +## SQL Querying + +Deprecated in: v4.2.0 + +SQL is still supported but discouraged. See [Database](../database/overview.md) for modern alternatives. +``` + +**Configuration option:** + +```markdown +### `logger.level` + +- Type: `string` +- Default: `"info"` +- Added in: v4.1.0 +``` + +If the introduction version is inferred from version comparison rather than confirmed by release notes, note it: + +```markdown +Added in: v4.3.0 (inferred from version comparison, needs verification) +``` ## Known Issues From e32a2bf9dcd3e26c7147d4507d2535482e8f644c Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 09:27:22 -0600 Subject: [PATCH 45/51] add version badge and update docs --- reference/analytics/overview.md | 2 +- reference/cli/commands.md | 6 +-- reference/cli/operations-api-commands.md | 2 +- reference/cli/overview.md | 8 ++-- reference/components/applications.md | 2 +- .../components/javascript-environment.md | 2 +- reference/components/overview.md | 8 ++-- reference/components/plugin-api.md | 2 +- reference/configuration/overview.md | 2 +- reference/database/api.md | 2 +- reference/database/compaction.md | 2 +- reference/database/data-loader.md | 2 +- reference/database/jobs.md | 4 +- reference/database/schema.md | 12 +++--- reference/database/storage-algorithm.md | 4 +- reference/database/system-tables.md | 4 +- reference/database/transaction.md | 4 +- reference/graphql-querying/overview.md | 4 +- reference/http/api.md | 2 +- reference/http/configuration.md | 14 +++---- reference/http/overview.md | 6 +-- reference/logging/configuration.md | 6 +-- reference/logging/overview.md | 4 +- reference/mqtt/configuration.md | 2 +- reference/mqtt/overview.md | 4 +- reference/operations-api/operations.md | 2 +- reference/replication/clustering.md | 2 +- reference/replication/overview.md | 4 +- reference/replication/sharding.md | 4 +- reference/resources/overview.md | 2 +- reference/resources/query-optimization.md | 4 +- reference/resources/resource-api.md | 20 +++++----- reference/rest/overview.md | 8 ++-- reference/rest/querying.md | 10 ++--- reference/rest/server-sent-events.md | 2 +- reference/rest/websockets.md | 2 +- reference/security/certificate-management.md | 4 +- .../security/certificate-verification.md | 4 +- reference/security/configuration.md | 4 +- reference/security/mtls-authentication.md | 2 +- reference/static-files/overview.md | 6 +-- reference/users-and-roles/configuration.md | 2 +- .../version-v4/analytics/overview.md | 2 +- .../version-v4/cli/commands.md | 6 +-- .../version-v4/cli/operations-api-commands.md | 2 +- .../version-v4/cli/overview.md | 8 ++-- .../version-v4/components/applications.md | 2 +- .../components/javascript-environment.md | 2 +- .../version-v4/components/overview.md | 8 ++-- .../version-v4/components/plugin-api.md | 2 +- .../version-v4/configuration/overview.md | 2 +- .../version-v4/database/api.md | 2 +- .../version-v4/database/compaction.md | 2 +- .../version-v4/database/data-loader.md | 2 +- .../version-v4/database/jobs.md | 4 +- .../version-v4/database/schema.md | 12 +++--- .../version-v4/database/storage-algorithm.md | 4 +- .../version-v4/database/system-tables.md | 4 +- .../version-v4/database/transaction.md | 4 +- .../version-v4/graphql-querying/overview.md | 4 +- .../version-v4/http/api.md | 2 +- .../version-v4/http/configuration.md | 14 +++---- .../version-v4/http/overview.md | 6 +-- .../version-v4/logging/configuration.md | 6 +-- .../version-v4/logging/overview.md | 4 +- .../version-v4/mqtt/configuration.md | 2 +- .../version-v4/mqtt/overview.md | 4 +- .../version-v4/operations-api/operations.md | 2 +- .../version-v4/replication/clustering.md | 2 +- .../version-v4/replication/overview.md | 4 +- .../version-v4/replication/sharding.md | 4 +- .../version-v4/resources/overview.md | 2 +- .../resources/query-optimization.md | 4 +- .../version-v4/resources/resource-api.md | 20 +++++----- .../version-v4/rest/overview.md | 8 ++-- .../version-v4/rest/querying.md | 10 ++--- .../version-v4/rest/server-sent-events.md | 2 +- .../version-v4/rest/websockets.md | 2 +- .../security/certificate-management.md | 4 +- .../security/certificate-verification.md | 4 +- .../version-v4/security/configuration.md | 4 +- .../security/mtls-authentication.md | 2 +- .../version-v4/static-files/overview.md | 6 +-- .../users-and-roles/configuration.md | 2 +- scripts/replace-version-annotations.js | 40 +++++++++++++++++++ src/components/VersionBadge.module.css | 7 ++++ src/components/VersionBadge.tsx | 23 +++++++++++ src/theme/MDXComponents.tsx | 7 ++++ 88 files changed, 269 insertions(+), 192 deletions(-) create mode 100644 scripts/replace-version-annotations.js create mode 100644 src/components/VersionBadge.module.css create mode 100644 src/components/VersionBadge.tsx create mode 100644 src/theme/MDXComponents.tsx diff --git a/reference/analytics/overview.md b/reference/analytics/overview.md index 679e70f4..185c5df0 100644 --- a/reference/analytics/overview.md +++ b/reference/analytics/overview.md @@ -7,7 +7,7 @@ title: Analytics -Added in: v4.5.0 (resource and storage analytics) + (resource and storage analytics) Harper collects real-time telemetry and statistics across all operations, URL endpoints, and messaging topics. This data can be used to monitor server health, understand traffic and usage patterns, identify resource-intensive queries, and inform scaling decisions. diff --git a/reference/cli/commands.md b/reference/cli/commands.md index 3f1fdc94..b7e45f29 100644 --- a/reference/cli/commands.md +++ b/reference/cli/commands.md @@ -15,7 +15,7 @@ This page documents the core Harper CLI commands for managing Harper instances. ### `harper` -Added in: v4.1.0 + Run Harper in the foreground as a standard process. This is the recommended way to run Harper. @@ -66,7 +66,7 @@ For more information on installation, see [Getting Started / Install and Connect ### `harper run` -Added in: v4.2.0 + Run a Harper application from any location as a foreground, standard process (similar to `harper`). @@ -78,7 +78,7 @@ This command runs Harper with the specified application directory without automa ### `harper dev` -Added in: v4.2.0 + Run Harper in development mode from a specified directory with automatic reloading. Recommended for local application development. Operates similar to `harper` and `harper run`. diff --git a/reference/cli/operations-api-commands.md b/reference/cli/operations-api-commands.md index bdfd49f5..bdf01d6b 100644 --- a/reference/cli/operations-api-commands.md +++ b/reference/cli/operations-api-commands.md @@ -7,7 +7,7 @@ title: Operations API Commands # Operations API Commands -Added in: v4.3.0 + The Harper CLI supports executing operations from the [Operations API](../operations-api/overview.md) directly from the command line. This enables powerful automation and scripting capabilities. diff --git a/reference/cli/overview.md b/reference/cli/overview.md index 5571becc..73bbd224 100644 --- a/reference/cli/overview.md +++ b/reference/cli/overview.md @@ -28,7 +28,7 @@ For detailed installation instructions, see the [Getting Started / Install And C ## Command Name -Changed in: v4.7.0 + The CLI command is `harper`. From v4.1.0 to v4.6.x, the command was only available as `harperdb`. Starting in v4.7.0, the preferred command is `harper`, though `harperdb` continues to work as an alias for backward compatibility. @@ -113,7 +113,7 @@ See [CLI Commands](./commands.md) for detailed documentation on each command. ## Operations API Commands -Added in: v4.3.0 + The Harper CLI supports executing most operations from the [Operations API](../operations-api/overview.md) directly from the command line. This includes operations that do not require complex nested parameters. @@ -147,7 +147,7 @@ See [Operations API Commands](./operations-api-commands.md) for the complete lis ## Remote Operations -Changed in: v4.3.0 (expanded remote operations support) + (expanded remote operations support) The CLI can execute operations on remote Harper instances by passing the `target` parameter with the HTTP address of the remote instance. @@ -174,7 +174,7 @@ harper describe_database database=dev target=https://server.com:9925 username=HD ## Development Mode -Added in: v4.2.0 + For local application and component development, use `harper dev`: diff --git a/reference/components/applications.md b/reference/components/applications.md index 5df64215..7c7a12c9 100644 --- a/reference/components/applications.md +++ b/reference/components/applications.md @@ -18,7 +18,7 @@ Harper offers several approaches to managing applications that differ between lo ### `dev` and `run` Commands -Added in: v4.2.0 + The quickest way to run an application locally is with the `dev` command inside the application directory: diff --git a/reference/components/javascript-environment.md b/reference/components/javascript-environment.md index cd6f75b1..55447cc9 100644 --- a/reference/components/javascript-environment.md +++ b/reference/components/javascript-environment.md @@ -52,7 +52,7 @@ See [Transactions](../database/transaction.md) for full reference. ### `createBlob(data, options?)` -Added in: v4.5.0 + Creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, etc.) in `Blob`-typed schema fields. diff --git a/reference/components/overview.md b/reference/components/overview.md index d41b7475..aa5f7630 100644 --- a/reference/components/overview.md +++ b/reference/components/overview.md @@ -18,13 +18,13 @@ title: Components ### Applications -Added in: v4.2.0 + **Applications** implement specific user-facing features or functionality. Applications are built on top of extensions and represent the end product that users interact with. For example, a Next.js application serving a web interface or an Apollo GraphQL server providing a GraphQL API are both applications. Also, a collection of Harper Schemas and/or custom Resources is also an application. ### Extensions -Added in: v4.2.0 + **Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality they implement. For example, the built-in `graphqlSchema` extension enables applications to define databases and tables using GraphQL schemas. The `@harperdb/nextjs` and `@harperdb/apollo` extensions provide building blocks for Next.js and Apollo applications respectively. @@ -32,7 +32,7 @@ Extensions can also depend on other extensions. For example, `@harperdb/apollo` ### Plugins (Experimental) -Added in: v4.6.0 (experimental) + (experimental) **Plugins** are a new iteration of the extension system introduced in v4.6. They are simultaneously a simplification and extensibility upgrade over extensions. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only export a single `handleApplication` method. @@ -149,7 +149,7 @@ Extensions require an `extensionModule` option pointing to the extension source. ## Component Status Monitoring -Added in: v4.7.0 + Harper collects status from each component at load time and tracks any registered status change notifications. This provides visibility into the health and state of running components. diff --git a/reference/components/plugin-api.md b/reference/components/plugin-api.md index c92cf839..69413101 100644 --- a/reference/components/plugin-api.md +++ b/reference/components/plugin-api.md @@ -8,7 +8,7 @@ title: Plugin API # Plugin API -Added in: v4.6.0 (experimental) + (experimental) > The Plugin API is **experimental**. It is the recommended approach for building new extensions, and is intended to replace the [Extension API](./extension-api.md) in the future. Both systems are supported simultaneously. diff --git a/reference/configuration/overview.md b/reference/configuration/overview.md index 747e681c..4604afec 100644 --- a/reference/configuration/overview.md +++ b/reference/configuration/overview.md @@ -94,7 +94,7 @@ HDB_CONFIG=/existing/rootpath/harperdb-config.yaml harperdb ## Environment Variable-Based Configuration -Added in: v4.7.2 + Harper provides two special environment variables for managing configuration across deployments: `HARPER_DEFAULT_CONFIG` and `HARPER_SET_CONFIG`. Both accept JSON-formatted configuration that mirrors the structure of `harperdb-config.yaml`. diff --git a/reference/database/api.md b/reference/database/api.md index bb8c225d..11bfbe70 100644 --- a/reference/database/api.md +++ b/reference/database/api.md @@ -136,7 +136,7 @@ For deeper background on Harper's transaction model, see [Storage Algorithm](./s ## `createBlob(data, options?)` -Added in: v4.5.0 + `createBlob` creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, large HTML, etc.) in a `Blob`-typed schema field. diff --git a/reference/database/compaction.md b/reference/database/compaction.md index a4ede5f2..faa9946d 100644 --- a/reference/database/compaction.md +++ b/reference/database/compaction.md @@ -7,7 +7,7 @@ title: Compaction # Compaction -Added in: v4.3.0 + Database files grow over time as records are inserted, updated, and deleted. Deleted records and updated values leave behind free space (fragmentation) in the database file, which can increase file size and potentially affect performance. Compaction eliminates this free space, creating a smaller, contiguous database file. diff --git a/reference/database/data-loader.md b/reference/database/data-loader.md index 962b3706..722eb2fa 100644 --- a/reference/database/data-loader.md +++ b/reference/database/data-loader.md @@ -7,7 +7,7 @@ title: Data Loader # Data Loader -Added in: v4.6.0 + The Data Loader is a built-in component that loads data from JSON or YAML files into Harper tables as part of component deployment. It is designed for seeding tables with initial records — configuration data, reference data, default users, or other records that should exist when a component is first deployed or updated. diff --git a/reference/database/jobs.md b/reference/database/jobs.md index 63fd8e0e..7ce15189 100644 --- a/reference/database/jobs.md +++ b/reference/database/jobs.md @@ -138,7 +138,7 @@ Exports table data to a local file in JSON or CSV format. - `path` _(required)_ — local directory path where the export file will be written - `search_operation` _(required)_ — query to select records: `search_by_hash`, `search_by_value`, `search_by_conditions`, or `sql` -Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation for exports + — `search_by_conditions` added as a supported search operation for exports - `filename` _(optional)_ — filename without extension; auto-generated from epoch timestamp if omitted @@ -160,7 +160,7 @@ Changed in: v4.3.0 — `search_by_conditions` added as a supported search operat Exports table data to an AWS S3 bucket in JSON or CSV format. -Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation + — `search_by_conditions` added as a supported search operation - `operation` _(required)_ — `export_to_s3` - `format` _(required)_ — `json` or `csv` diff --git a/reference/database/schema.md b/reference/database/schema.md index 905aa1f9..b29f5d59 100644 --- a/reference/database/schema.md +++ b/reference/database/schema.md @@ -18,7 +18,7 @@ Harper uses GraphQL Schema Definition Language (SDL) to declaratively define tab ## Overview -Added in: v4.2.0 + Schemas are defined using standard [GraphQL type definitions](https://graphql.org/learn/schema/) with Harper-specific directives. A schema definition: @@ -157,7 +157,7 @@ If no primary key is provided on insert, Harper auto-generates one: - **UUID string** — when type is `String` or `ID` - **Auto-incrementing integer** — when type is `Int`, `Long`, or `Any` -Changed in: v4.4.0 + Auto-incrementing integer primary keys were added. Previously only UUID generation was supported for `ID` and `String` types. @@ -203,7 +203,7 @@ type Event @table { ## Relationships -Added in: v4.3.0 + The `@relationship` directive defines how one table relates to another through a foreign key. Relationships enable join queries and allow related records to be selected as nested properties in query results. @@ -277,7 +277,7 @@ Schemas can also define self-referential relationships, enabling parent-child hi ## Computed Properties -Added in: v4.4.0 + The `@computed` directive marks a field as derived from other fields at query time. Computed properties are not stored in the database but are evaluated when the field is accessed. @@ -334,7 +334,7 @@ Increment `version` whenever the computation function changes. Failing to do so ## Vector Indexing -Added in: v4.6.0 + Use `@indexed(type: "HNSW")` to create a vector index using the Hierarchical Navigable Small World algorithm, designed for fast approximate nearest-neighbor search on high-dimensional vectors. @@ -410,7 +410,7 @@ Arrays of a type are expressed with `[Type]` syntax (e.g., `[Float]` for a vecto ### Blob Type -Added in: v4.5.0 + `Blob` fields are designed for large binary content. Harper's `Blob` type implements the [Web API `Blob` interface](https://developer.mozilla.org/en-US/docs/Web/API/Blob), so all standard `Blob` methods (`.text()`, `.arrayBuffer()`, `.stream()`, `.slice()`) are available. Unlike `Bytes`, blobs are stored separately from the record, support streaming, and do not need to be held entirely in memory. Use `Blob` for content typically larger than 20KB (images, video, audio, large HTML, etc.). diff --git a/reference/database/storage-algorithm.md b/reference/database/storage-algorithm.md index 35af971b..3a3e0087 100644 --- a/reference/database/storage-algorithm.md +++ b/reference/database/storage-algorithm.md @@ -26,7 +26,7 @@ Each Harper table has a single writer process, eliminating deadlocks and ensurin ## Universally Indexed -Changed in: v4.3.0 — Storage performance improvements including better free-space management + — Storage performance improvements including better free-space management For [dynamic schema tables](./overview.md#dynamic-vs-defined-schemas), all top-level attributes are automatically indexed immediately upon ingestion — Harper reflexively creates the attribute and its index as new data arrives. For [schema-defined tables](./schema.md), indexes are created for all attributes marked with `@indexed`. @@ -42,7 +42,7 @@ Within the LMDB implementation, table records are grouped into a single LMDB env ## Compression -Changed in: v4.3.0 — Compression is now enabled by default for all records over 4KB + — Compression is now enabled by default for all records over 4KB Harper compresses record data automatically for records over 4KB. Compression settings can be configured in the [storage configuration](../configuration/options.md). Note that compression settings cannot be changed on existing databases without creating a new compacted copy — see [Compaction](./compaction.md). diff --git a/reference/database/system-tables.md b/reference/database/system-tables.md index 683dfb6e..84da6dd0 100644 --- a/reference/database/system-tables.md +++ b/reference/database/system-tables.md @@ -14,7 +14,7 @@ System tables are prefixed with `hdb_` and reside in the `system` database. ## Analytics Tables -Added in: v4.5.0 (resource and storage analytics expansion) + (resource and storage analytics expansion) ### `hdb_raw_analytics` @@ -121,7 +121,7 @@ For a full reference of available metrics and their fields, see [Analytics](../a ### `hdb_dataloader_hash` -Added in: v4.6.0 + Used internally by the [Data Loader](./data-loader.md) to track which records have been loaded and detect changes. Stores SHA-256 content hashes of data file records so that unchanged records are not re-written on subsequent deployments. diff --git a/reference/database/transaction.md b/reference/database/transaction.md index 3ae8847f..e65fb626 100644 --- a/reference/database/transaction.md +++ b/reference/database/transaction.md @@ -118,9 +118,9 @@ The `original_records` field contains the record state before the operation was Deletes audit log entries older than the specified timestamp. -Changed in: v4.3.0 — Audit log cleanup improved to reduce resource consumption during scheduled cleanups + — Audit log cleanup improved to reduce resource consumption during scheduled cleanups -Changed in: v4.5.0 — Storage reclamation: Harper automatically evicts older audit log entries when free storage drops below a configurable threshold + — Storage reclamation: Harper automatically evicts older audit log entries when free storage drops below a configurable threshold ```json { diff --git a/reference/graphql-querying/overview.md b/reference/graphql-querying/overview.md index 917f0e54..fb56ffc3 100644 --- a/reference/graphql-querying/overview.md +++ b/reference/graphql-querying/overview.md @@ -10,9 +10,9 @@ title: GraphQL Querying # GraphQL Querying -Added in: v4.4.0 (provisional) + (provisional) -Changed in: v4.5.0 (disabled by default, configuration options) + (disabled by default, configuration options) Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../components/applications.md), and for querying [Resources](../resources/overview.md). diff --git a/reference/http/api.md b/reference/http/api.md index bde865df..fb1b896a 100644 --- a/reference/http/api.md +++ b/reference/http/api.md @@ -243,7 +243,7 @@ A Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`t ## `server.authenticateUser(username, password)` -Added in: v4.5.0 + ```ts server.authenticateUser(username: string, password: string): Promise diff --git a/reference/http/configuration.md b/reference/http/configuration.md index abc21a29..880370cc 100644 --- a/reference/http/configuration.md +++ b/reference/http/configuration.md @@ -50,7 +50,7 @@ tls: ### `http.http2` -Added in: v4.5.0 + Type: `boolean` @@ -110,7 +110,7 @@ The maximum estimated request queue time in milliseconds. When the queue exceeds ### `http.compressionThreshold` -Added in: v4.2.0 + Type: `number` @@ -151,7 +151,7 @@ http: ### `http.corsAccessControlAllowHeaders` -Added in: v4.5.0 + Type: `string` @@ -163,7 +163,7 @@ Comma-separated list of headers allowed in the [`Access-Control-Allow-Headers`]( ### `http.sessionAffinity` -Added in: v4.1.0 + Type: `string` @@ -189,7 +189,7 @@ If Harper is behind a reverse proxy and you use `ip`, all requests will share th ### `http.mtls` -Added in: v4.3.0 + Type: `boolean | object` @@ -212,7 +212,7 @@ For granular control, specify an object: ### `http.mtls.certificateVerification` -Added in: v4.7.0 (OCSP support) + (OCSP support) Type: `boolean | object` @@ -281,7 +281,7 @@ HTTP request logging is disabled by default. Enabling the `http.logging` block t ### `http.logging` -Added in: v4.6.0 + Type: `object` diff --git a/reference/http/overview.md b/reference/http/overview.md index 06858a8c..f47f0836 100644 --- a/reference/http/overview.md +++ b/reference/http/overview.md @@ -14,9 +14,9 @@ Harper includes a built-in HTTP server that serves as the primary interface for Harper's HTTP server is multi-threaded. Each thread runs an independent copy of the HTTP stack, and incoming connections are distributed across threads using `SO_REUSEPORT` socket sharing — the most performant mechanism available for multi-threaded socket handling. -Added in: v4.1.0 (worker threads for HTTP requests) + (worker threads for HTTP requests) -Changed in: v4.2.0 (switched from process-per-thread model with session-affinity delegation to `SO_REUSEPORT` socket sharing) + (switched from process-per-thread model with session-affinity delegation to `SO_REUSEPORT` socket sharing) In previous versions: Session-affinity based socket delegation was used to route requests. This has been deprecated in favor of `SO_REUSEPORT`. @@ -44,7 +44,7 @@ See [Configuration](./configuration) for TLS options and [Security](../security/ ## HTTP/2 -Added in: v4.5.0 + HTTP/2 can be enabled with the `http2: true` option in `harperdb-config.yaml`. When enabled, HTTP/2 applies to all API endpoints served on `http.securePort` (HTTP/2 requires TLS). diff --git a/reference/logging/configuration.md b/reference/logging/configuration.md index d32b0f28..d556a4fe 100644 --- a/reference/logging/configuration.md +++ b/reference/logging/configuration.md @@ -182,7 +182,7 @@ logging: ### `logging.auditAuthEvents.logFailed` -Added in: v4.2.0 + Type: `boolean` @@ -198,7 +198,7 @@ Example log entry: ### `logging.auditAuthEvents.logSuccessful` -Added in: v4.2.0 + Type: `boolean` @@ -221,7 +221,7 @@ logging: ## Per-Component Logging -Added in: v4.6.0 + Harper supports independent logging configurations for different components. Each component logger can have its own `path`, `root`, `level`, `tag`, and `stdStreams` settings. All components default to the main `logging` configuration unless overridden. diff --git a/reference/logging/overview.md b/reference/logging/overview.md index 862aa01c..8d276e80 100644 --- a/reference/logging/overview.md +++ b/reference/logging/overview.md @@ -14,7 +14,7 @@ Harper's core logging system is used for diagnostics, monitoring, and observabil ## Log File -Changed in: v4.1.0 — All logs consolidated into a single `hdb.log` file + — All logs consolidated into a single `hdb.log` file All standard log output is written to `/log/hdb.log` (default: `~/hdb/log/hdb.log`). @@ -60,7 +60,7 @@ The default log level is `warn`. Setting a level includes that level and all mor ## Standard Streams -Changed in: v4.6.0 + By default, logs are written only to the log file. To also log to `stdout`/`stderr`, set [`logging.stdStreams: true`](./configuration.md#loggingstdstreams) (this is automatically enabled by the `DEFAULT_MODE=dev` configuration during installation). diff --git a/reference/mqtt/configuration.md b/reference/mqtt/configuration.md index 93205ee9..4b43c711 100644 --- a/reference/mqtt/configuration.md +++ b/reference/mqtt/configuration.md @@ -78,7 +78,7 @@ mqtt: ### `mqtt.network.mtls` -Added in: v4.3.0 + Type: `boolean | object` diff --git a/reference/mqtt/overview.md b/reference/mqtt/overview.md index 3da53187..ffa6537b 100644 --- a/reference/mqtt/overview.md +++ b/reference/mqtt/overview.md @@ -8,7 +8,7 @@ title: MQTT -Added in: v4.2.0 + Harper includes a built-in MQTT broker that provides real-time pub/sub messaging deeply integrated with the database. Unlike a generic MQTT broker, Harper's MQTT implementation connects topics directly to database records — publishing to a topic writes to the database, and subscribing to a topic delivers live updates for the corresponding record. @@ -53,7 +53,7 @@ Harper supports multi-level topics for both publishing and subscribing: ### Last Will -Added in: v4.3.0 + Harper supports the MQTT Last Will and Testament feature. If a client disconnects unexpectedly, the broker publishes the configured will message on its behalf. diff --git a/reference/operations-api/operations.md b/reference/operations-api/operations.md index 690aeecd..cd727625 100644 --- a/reference/operations-api/operations.md +++ b/reference/operations-api/operations.md @@ -961,7 +961,7 @@ The following operations are deprecated and should not be used in new code. Custom Functions were the precursor to the Component architecture introduced in v4.2.0. These operations are preserved for backward compatibility. -Deprecated in: v4.2.0 (moved to legacy in v4.7+) + (moved to legacy in v4.7+) For modern equivalents, see [Components Overview](../components/overview.md). diff --git a/reference/replication/clustering.md b/reference/replication/clustering.md index 98937eef..05bc4c1a 100644 --- a/reference/replication/clustering.md +++ b/reference/replication/clustering.md @@ -122,7 +122,7 @@ Removes a Harper node from the cluster and stops all replication to and from tha Returns an array of status objects from the cluster, including active WebSocket connections and replication timing statistics. -Added in: v4.4.0; timing statistics added in v4.5.0 + ; timing statistics added in v4.5.0 **Parameters**: diff --git a/reference/replication/overview.md b/reference/replication/overview.md index 7847ecc9..0139d967 100644 --- a/reference/replication/overview.md +++ b/reference/replication/overview.md @@ -135,7 +135,7 @@ Credentials are not stored — they are discarded immediately after use. You can ### Revoking Certificates -Added in: v4.5.0 + Certificates used in replication can be revoked using the certificate serial number. Use either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config: @@ -242,7 +242,7 @@ Update a subscription with `update_node`: ## Monitoring Replication -Added in: v4.5.0 (cluster status timing statistics) + (cluster status timing statistics) Use `cluster_status` to monitor the state of replication: diff --git a/reference/replication/sharding.md b/reference/replication/sharding.md index 6625045a..59efbfd8 100644 --- a/reference/replication/sharding.md +++ b/reference/replication/sharding.md @@ -8,9 +8,9 @@ title: Sharding # Sharding -Added in: v4.4.0 (provisional) + (provisional) -Changed in: v4.5.0 — expanded sharding functionality: Harper now honors write requests with residency information that will not be stored on the local node, and nodes can be declaratively configured as part of a shard. + — expanded sharding functionality: Harper now honors write requests with residency information that will not be stored on the local node, and nodes can be declaratively configured as part of a shard. Harper's replication system supports sharding — storing different data across different subsets of nodes — while still allowing data to be accessed from any node in the cluster. This enables horizontal scalability for storage and write performance, while maintaining optimal data locality and consistency. diff --git a/reference/resources/overview.md b/reference/resources/overview.md index 09f47e5b..e6e22c13 100644 --- a/reference/resources/overview.md +++ b/reference/resources/overview.md @@ -13,7 +13,7 @@ Harper's Resource API is the foundation for building custom data access logic an A **Resource** is a class that provides a unified interface for a set of records or entities. Harper's built-in tables extend the base `Resource` class, and you can extend either `Resource` or a table class to implement custom behavior for any data source — internal or external. -Added in: v4.2.0 + The Resource API is designed to mirror REST/HTTP semantics: methods map directly to HTTP verbs (`get`, `put`, `patch`, `post`, `delete`), making it straightforward to build API endpoints alongside custom data logic. diff --git a/reference/resources/query-optimization.md b/reference/resources/query-optimization.md index 6501565a..9951a3c9 100644 --- a/reference/resources/query-optimization.md +++ b/reference/resources/query-optimization.md @@ -7,7 +7,7 @@ title: Query Optimization # Query Optimization -Added in: v4.3.0 (query planning and execution improvements) + (query planning and execution improvements) Harper has powerful query functionality with excellent performance characteristics. Like any database, different queries can vary significantly in performance. Understanding how querying works helps you write queries that perform well as your dataset grows. @@ -89,7 +89,7 @@ type Brand @table { } ``` -Added in: v4.3.0 + ## Sorting diff --git a/reference/resources/resource-api.md b/reference/resources/resource-api.md index 8c047d48..8c9e28ef 100644 --- a/reference/resources/resource-api.md +++ b/reference/resources/resource-api.md @@ -13,7 +13,7 @@ title: Resource API # Resource API -Added in: v4.2.0 + The Resource API provides a unified JavaScript interface for accessing, querying, modifying, and subscribing to data resources in Harper. Tables extend the base `Resource` class, and all resource interactions — whether from HTTP requests, MQTT messages, or application code — flow through this interface. @@ -32,7 +32,7 @@ This page documents V2 behavior (`loadAsInstance = false`). For V1 (legacy insta ### V2 Behavioral Differences from V1 -Changed in: v4.6.0 (Resource API upgrades that formalized V2) + (Resource API upgrades that formalized V2) When `loadAsInstance = false`: @@ -111,7 +111,7 @@ put(target, data) { Called for HTTP PATCH requests. Merges `data` into the existing record, preserving any properties not included in `data`. -Added in: v4.3.0 (CRDT support for individual property updates via PATCH) + (CRDT support for individual property updates via PATCH) ### `post(target: RequestTarget | Id, data: object): void | Response` @@ -141,7 +141,7 @@ The `Updatable` class provides direct property access plus: Adds `value` to `property` using CRDT incrementation — safe for concurrent updates across threads and nodes. -Added in: v4.3.0 + ```javascript post(target, data) { @@ -286,7 +286,7 @@ Save a record (create or replace). The second form reads the primary key from th Create a new record with an auto-generated primary key. Returns the created record. Do not include a primary key in the `record` argument. -Added in: v4.2.0 + ### `patch(target: RequestTarget | Id, updates: object, context?): Promise` @@ -316,7 +316,7 @@ Query the table. See [Query Object](#query-object) below for available query opt Define the compute function for a `@computed` schema attribute. -Added in: v4.4.0 + ```javascript MyTable.setComputedAttribute('fullName', (record) => `${record.firstName} ${record.lastName}`); @@ -326,7 +326,7 @@ MyTable.setComputedAttribute('fullName', (record) => `${record.firstName} ${reco Returns the number of records in the table. By default returns an approximate (fast) count. Pass `{ exactCount: true }` for a precise count. -Added in: v4.5.0 + ### `sourcedFrom(Resource, options?)` @@ -352,7 +352,7 @@ static parsePath(path) { Set this static property to `true` to map the full URL (including query string) as the primary key, bypassing query parsing. -Added in: v4.5.0 (documented in improved URL path parsing) + (documented in improved URL path parsing) ```javascript export class MyTable extends tables.MyTable { @@ -415,7 +415,7 @@ Product.search({ Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); ``` -Added in: v4.3.0 + ### `operator` @@ -511,7 +511,7 @@ return { status: 200, headers: { 'X-Custom-Header': 'value' }, data: { message: `body` must be a string, `Buffer`, Node.js stream, or `ReadableStream`. `data` is an object that will be serialized. -Added in: v4.4.0 + ### Throwing Errors diff --git a/reference/rest/overview.md b/reference/rest/overview.md index e38b2e4c..a2121f98 100644 --- a/reference/rest/overview.md +++ b/reference/rest/overview.md @@ -9,7 +9,7 @@ title: REST Overview # REST Overview -Added in: v4.2.0 + Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation over HTTP, providing the best performance and HTTP interoperability with different clients. @@ -47,7 +47,7 @@ The REST interface follows a consistent URL structure: | `/my-resource/record-id/` | Trailing slash — the collection of records with the given id prefix | | `/my-resource/record-id/with/multiple/parts` | Record id with multiple path segments | -Changed in: v4.5.0 — Resources can be defined with nested paths and accessed by exact path without a trailing slash. The `id.property` dot syntax for accessing properties via URL is only applied to properties declared in a schema. + — Resources can be defined with nested paths and accessed by exact path without a trailing slash. The `id.property` dot syntax for accessing properties via URL is only applied to properties declared in a schema. ## HTTP Methods @@ -107,7 +107,7 @@ Content-Type: application/json Partially update a record, merging only the provided properties (CRDT-style update). Unspecified properties are preserved. -Added in: v4.3.0 + ```http PATCH /MyTable/123 @@ -140,7 +140,7 @@ See [Content Types](./content-types.md) for the full list of supported formats a ## OpenAPI -Added in: v4.3.0 + Harper automatically generates an OpenAPI specification for all resources exported via a schema. This endpoint is available at: diff --git a/reference/rest/querying.md b/reference/rest/querying.md index 37ad0b4d..b11f0ff4 100644 --- a/reference/rest/querying.md +++ b/reference/rest/querying.md @@ -27,7 +27,7 @@ GET /Product/?category=software&inStock=true ### Null Queries -Added in: v4.3.0 + Query for null values or non-null values: @@ -173,11 +173,11 @@ GET /Product/?rating=gt=3&sort(+name) GET /Product/?sort(+rating,-price) ``` -Added in: v4.3.0 + ## Relationships and Joins -Added in: v4.3.0 + Harper supports querying across related tables through dot-syntax chained attributes. Relationships must be defined in the schema using `@relation`. @@ -237,7 +237,7 @@ The array order of `resellerIds` is preserved when resolving the relationship. ## Property Access via URL -Changed in: v4.5.0 + Access a specific property of a record by appending it with dot syntax to the record id: @@ -249,7 +249,7 @@ This only works for properties declared in the schema. As of v4.5.0, dots in URL ## `directURLMapping` Option -Added in: v4.5.0 + Resources can be configured with `directURLMapping: true` for more direct URL path handling. When enabled, the URL path is mapped more directly to the resource without the default query parameter parsing semantics. See [Database / Schema](../database/schema.md) for configuration details. diff --git a/reference/rest/server-sent-events.md b/reference/rest/server-sent-events.md index bdffaa1f..7dc98a0e 100644 --- a/reference/rest/server-sent-events.md +++ b/reference/rest/server-sent-events.md @@ -7,7 +7,7 @@ title: Server-Sent Events # Server-Sent Events -Added in: v4.2.0 + Harper supports Server-Sent Events (SSE), a simple and efficient mechanism for browser-based applications to receive real-time updates from the server over a standard HTTP connection. SSE is a one-directional transport — the server pushes events to the client, and the client has no way to send messages back on the same connection. diff --git a/reference/rest/websockets.md b/reference/rest/websockets.md index 005b6795..2675e57e 100644 --- a/reference/rest/websockets.md +++ b/reference/rest/websockets.md @@ -8,7 +8,7 @@ title: WebSockets # WebSockets -Added in: v4.2.0 + Harper supports WebSocket connections through the REST interface, enabling real-time bidirectional communication with resources. WebSocket connections target a resource URL path — by default, connecting to a resource subscribes to changes for that resource. diff --git a/reference/security/certificate-management.md b/reference/security/certificate-management.md index 79f254b8..b7357e36 100644 --- a/reference/security/certificate-management.md +++ b/reference/security/certificate-management.md @@ -87,7 +87,7 @@ For full mTLS authentication details, see [mTLS Authentication](./mtls-authentic ## Certificate Verification -Added in: v4.5.0 (certificate revocation); v4.7.0 (OCSP support) + (certificate revocation); v4.7.0 (OCSP support) When using mTLS, enable certificate verification to ensure revoked certificates cannot authenticate even if still within their validity period: @@ -122,7 +122,7 @@ For full configuration options and troubleshooting, see [Certificate Verificatio ## Dynamic Certificate Management -Added in: v4.4.0 + Certificates — including CAs and private keys — can be dynamically managed without restarting Harper. diff --git a/reference/security/certificate-verification.md b/reference/security/certificate-verification.md index e2ee6ad2..2b00542f 100644 --- a/reference/security/certificate-verification.md +++ b/reference/security/certificate-verification.md @@ -7,9 +7,9 @@ title: Certificate Verification -Added in: v4.5.0 + -Changed in: v4.7.0 (OCSP support added) + (OCSP support added) Certificate verification (also called certificate revocation checking) ensures that revoked certificates cannot be used for mTLS authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date — due to compromise, employee departure, or other security concerns. diff --git a/reference/security/configuration.md b/reference/security/configuration.md index 717bc180..e77aff2b 100644 --- a/reference/security/configuration.md +++ b/reference/security/configuration.md @@ -35,7 +35,7 @@ How long (in milliseconds) an authentication result — a particular `Authorizat _Type: boolean — Default: `true`_ -Added in: v4.2.0 + Enables cookie-based sessions to maintain an authenticated session across requests. This is the preferred authentication mechanism for web browsers: cookies hold the token securely without exposing it to JavaScript, reducing XSS vulnerability risk. @@ -55,7 +55,7 @@ How long a JWT refresh token remains valid before expiring. Accepts [`jsonwebtok _Type: string — Default: `sha256`_ -Added in: v4.5.0 + Password hashing algorithm used when storing user passwords. Replaced the previous MD5 hashing. Options: diff --git a/reference/security/mtls-authentication.md b/reference/security/mtls-authentication.md index d1f202e9..2e89dcbc 100644 --- a/reference/security/mtls-authentication.md +++ b/reference/security/mtls-authentication.md @@ -6,7 +6,7 @@ title: mTLS Authentication -Added in: v4.3.0 + Harper supports Mutual TLS (mTLS) authentication for incoming HTTP connections. When enabled, the client must present a certificate signed by a trusted Certificate Authority (CA). If the certificate is valid and trusted, the connection is authenticated using the user whose username matches the `CN` (Common Name) from the client certificate's `subject`. diff --git a/reference/static-files/overview.md b/reference/static-files/overview.md index 2d0ea9f5..7104e0c6 100644 --- a/reference/static-files/overview.md +++ b/reference/static-files/overview.md @@ -44,7 +44,7 @@ Files are accessed relative to the matched directory root, so `GET /index.html` ## `files` and `urlPath` Options -Added in: v4.5 + `static` is a [Plugin](../components/overview.md) and supports the standard `files` and `urlPath` configuration options for controlling which files to serve and at what URL path. @@ -62,7 +62,7 @@ See [Components Overview](../components/overview.md) for full `files` glob patte ## Additional Options -Added in: v4.7 + In addition to the standard `files`, `urlPath`, and `timeout` options, `static` supports these configuration options: @@ -76,7 +76,7 @@ In addition to the standard `files`, `urlPath`, and `timeout` options, `static` ## Auto-Updates -Added in: v4.7.0 + Because `static` uses the Plugin API, it automatically responds to changes without requiring a Harper restart. Adding, removing, or modifying files — or updating `config.yaml` — takes effect immediately. diff --git a/reference/users-and-roles/configuration.md b/reference/users-and-roles/configuration.md index 8177180b..37565b65 100644 --- a/reference/users-and-roles/configuration.md +++ b/reference/users-and-roles/configuration.md @@ -52,7 +52,7 @@ editor: ## Password Hashing -Added in: v4.5.0 + Harper supports two password hashing algorithms, replacing the previous MD5 hashing: diff --git a/reference_versioned_docs/version-v4/analytics/overview.md b/reference_versioned_docs/version-v4/analytics/overview.md index 679e70f4..185c5df0 100644 --- a/reference_versioned_docs/version-v4/analytics/overview.md +++ b/reference_versioned_docs/version-v4/analytics/overview.md @@ -7,7 +7,7 @@ title: Analytics -Added in: v4.5.0 (resource and storage analytics) + (resource and storage analytics) Harper collects real-time telemetry and statistics across all operations, URL endpoints, and messaging topics. This data can be used to monitor server health, understand traffic and usage patterns, identify resource-intensive queries, and inform scaling decisions. diff --git a/reference_versioned_docs/version-v4/cli/commands.md b/reference_versioned_docs/version-v4/cli/commands.md index 3f1fdc94..b7e45f29 100644 --- a/reference_versioned_docs/version-v4/cli/commands.md +++ b/reference_versioned_docs/version-v4/cli/commands.md @@ -15,7 +15,7 @@ This page documents the core Harper CLI commands for managing Harper instances. ### `harper` -Added in: v4.1.0 + Run Harper in the foreground as a standard process. This is the recommended way to run Harper. @@ -66,7 +66,7 @@ For more information on installation, see [Getting Started / Install and Connect ### `harper run` -Added in: v4.2.0 + Run a Harper application from any location as a foreground, standard process (similar to `harper`). @@ -78,7 +78,7 @@ This command runs Harper with the specified application directory without automa ### `harper dev` -Added in: v4.2.0 + Run Harper in development mode from a specified directory with automatic reloading. Recommended for local application development. Operates similar to `harper` and `harper run`. diff --git a/reference_versioned_docs/version-v4/cli/operations-api-commands.md b/reference_versioned_docs/version-v4/cli/operations-api-commands.md index bdfd49f5..bdf01d6b 100644 --- a/reference_versioned_docs/version-v4/cli/operations-api-commands.md +++ b/reference_versioned_docs/version-v4/cli/operations-api-commands.md @@ -7,7 +7,7 @@ title: Operations API Commands # Operations API Commands -Added in: v4.3.0 + The Harper CLI supports executing operations from the [Operations API](../operations-api/overview.md) directly from the command line. This enables powerful automation and scripting capabilities. diff --git a/reference_versioned_docs/version-v4/cli/overview.md b/reference_versioned_docs/version-v4/cli/overview.md index 5571becc..73bbd224 100644 --- a/reference_versioned_docs/version-v4/cli/overview.md +++ b/reference_versioned_docs/version-v4/cli/overview.md @@ -28,7 +28,7 @@ For detailed installation instructions, see the [Getting Started / Install And C ## Command Name -Changed in: v4.7.0 + The CLI command is `harper`. From v4.1.0 to v4.6.x, the command was only available as `harperdb`. Starting in v4.7.0, the preferred command is `harper`, though `harperdb` continues to work as an alias for backward compatibility. @@ -113,7 +113,7 @@ See [CLI Commands](./commands.md) for detailed documentation on each command. ## Operations API Commands -Added in: v4.3.0 + The Harper CLI supports executing most operations from the [Operations API](../operations-api/overview.md) directly from the command line. This includes operations that do not require complex nested parameters. @@ -147,7 +147,7 @@ See [Operations API Commands](./operations-api-commands.md) for the complete lis ## Remote Operations -Changed in: v4.3.0 (expanded remote operations support) + (expanded remote operations support) The CLI can execute operations on remote Harper instances by passing the `target` parameter with the HTTP address of the remote instance. @@ -174,7 +174,7 @@ harper describe_database database=dev target=https://server.com:9925 username=HD ## Development Mode -Added in: v4.2.0 + For local application and component development, use `harper dev`: diff --git a/reference_versioned_docs/version-v4/components/applications.md b/reference_versioned_docs/version-v4/components/applications.md index 5df64215..7c7a12c9 100644 --- a/reference_versioned_docs/version-v4/components/applications.md +++ b/reference_versioned_docs/version-v4/components/applications.md @@ -18,7 +18,7 @@ Harper offers several approaches to managing applications that differ between lo ### `dev` and `run` Commands -Added in: v4.2.0 + The quickest way to run an application locally is with the `dev` command inside the application directory: diff --git a/reference_versioned_docs/version-v4/components/javascript-environment.md b/reference_versioned_docs/version-v4/components/javascript-environment.md index cd6f75b1..55447cc9 100644 --- a/reference_versioned_docs/version-v4/components/javascript-environment.md +++ b/reference_versioned_docs/version-v4/components/javascript-environment.md @@ -52,7 +52,7 @@ See [Transactions](../database/transaction.md) for full reference. ### `createBlob(data, options?)` -Added in: v4.5.0 + Creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, etc.) in `Blob`-typed schema fields. diff --git a/reference_versioned_docs/version-v4/components/overview.md b/reference_versioned_docs/version-v4/components/overview.md index d41b7475..aa5f7630 100644 --- a/reference_versioned_docs/version-v4/components/overview.md +++ b/reference_versioned_docs/version-v4/components/overview.md @@ -18,13 +18,13 @@ title: Components ### Applications -Added in: v4.2.0 + **Applications** implement specific user-facing features or functionality. Applications are built on top of extensions and represent the end product that users interact with. For example, a Next.js application serving a web interface or an Apollo GraphQL server providing a GraphQL API are both applications. Also, a collection of Harper Schemas and/or custom Resources is also an application. ### Extensions -Added in: v4.2.0 + **Extensions** are the building blocks of the Harper component system. Applications depend on extensions to provide the functionality they implement. For example, the built-in `graphqlSchema` extension enables applications to define databases and tables using GraphQL schemas. The `@harperdb/nextjs` and `@harperdb/apollo` extensions provide building blocks for Next.js and Apollo applications respectively. @@ -32,7 +32,7 @@ Extensions can also depend on other extensions. For example, `@harperdb/apollo` ### Plugins (Experimental) -Added in: v4.6.0 (experimental) + (experimental) **Plugins** are a new iteration of the extension system introduced in v4.6. They are simultaneously a simplification and extensibility upgrade over extensions. Instead of defining multiple methods (`start` vs `startOnMainThread`, `handleFile` vs `setupFile`, `handleDirectory` vs `setupDirectory`), plugins only export a single `handleApplication` method. @@ -149,7 +149,7 @@ Extensions require an `extensionModule` option pointing to the extension source. ## Component Status Monitoring -Added in: v4.7.0 + Harper collects status from each component at load time and tracks any registered status change notifications. This provides visibility into the health and state of running components. diff --git a/reference_versioned_docs/version-v4/components/plugin-api.md b/reference_versioned_docs/version-v4/components/plugin-api.md index c92cf839..69413101 100644 --- a/reference_versioned_docs/version-v4/components/plugin-api.md +++ b/reference_versioned_docs/version-v4/components/plugin-api.md @@ -8,7 +8,7 @@ title: Plugin API # Plugin API -Added in: v4.6.0 (experimental) + (experimental) > The Plugin API is **experimental**. It is the recommended approach for building new extensions, and is intended to replace the [Extension API](./extension-api.md) in the future. Both systems are supported simultaneously. diff --git a/reference_versioned_docs/version-v4/configuration/overview.md b/reference_versioned_docs/version-v4/configuration/overview.md index 747e681c..4604afec 100644 --- a/reference_versioned_docs/version-v4/configuration/overview.md +++ b/reference_versioned_docs/version-v4/configuration/overview.md @@ -94,7 +94,7 @@ HDB_CONFIG=/existing/rootpath/harperdb-config.yaml harperdb ## Environment Variable-Based Configuration -Added in: v4.7.2 + Harper provides two special environment variables for managing configuration across deployments: `HARPER_DEFAULT_CONFIG` and `HARPER_SET_CONFIG`. Both accept JSON-formatted configuration that mirrors the structure of `harperdb-config.yaml`. diff --git a/reference_versioned_docs/version-v4/database/api.md b/reference_versioned_docs/version-v4/database/api.md index bb8c225d..11bfbe70 100644 --- a/reference_versioned_docs/version-v4/database/api.md +++ b/reference_versioned_docs/version-v4/database/api.md @@ -136,7 +136,7 @@ For deeper background on Harper's transaction model, see [Storage Algorithm](./s ## `createBlob(data, options?)` -Added in: v4.5.0 + `createBlob` creates a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob) backed by Harper's storage engine. Use it to store large binary content (images, audio, video, large HTML, etc.) in a `Blob`-typed schema field. diff --git a/reference_versioned_docs/version-v4/database/compaction.md b/reference_versioned_docs/version-v4/database/compaction.md index a4ede5f2..faa9946d 100644 --- a/reference_versioned_docs/version-v4/database/compaction.md +++ b/reference_versioned_docs/version-v4/database/compaction.md @@ -7,7 +7,7 @@ title: Compaction # Compaction -Added in: v4.3.0 + Database files grow over time as records are inserted, updated, and deleted. Deleted records and updated values leave behind free space (fragmentation) in the database file, which can increase file size and potentially affect performance. Compaction eliminates this free space, creating a smaller, contiguous database file. diff --git a/reference_versioned_docs/version-v4/database/data-loader.md b/reference_versioned_docs/version-v4/database/data-loader.md index 962b3706..722eb2fa 100644 --- a/reference_versioned_docs/version-v4/database/data-loader.md +++ b/reference_versioned_docs/version-v4/database/data-loader.md @@ -7,7 +7,7 @@ title: Data Loader # Data Loader -Added in: v4.6.0 + The Data Loader is a built-in component that loads data from JSON or YAML files into Harper tables as part of component deployment. It is designed for seeding tables with initial records — configuration data, reference data, default users, or other records that should exist when a component is first deployed or updated. diff --git a/reference_versioned_docs/version-v4/database/jobs.md b/reference_versioned_docs/version-v4/database/jobs.md index 63fd8e0e..7ce15189 100644 --- a/reference_versioned_docs/version-v4/database/jobs.md +++ b/reference_versioned_docs/version-v4/database/jobs.md @@ -138,7 +138,7 @@ Exports table data to a local file in JSON or CSV format. - `path` _(required)_ — local directory path where the export file will be written - `search_operation` _(required)_ — query to select records: `search_by_hash`, `search_by_value`, `search_by_conditions`, or `sql` -Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation for exports + — `search_by_conditions` added as a supported search operation for exports - `filename` _(optional)_ — filename without extension; auto-generated from epoch timestamp if omitted @@ -160,7 +160,7 @@ Changed in: v4.3.0 — `search_by_conditions` added as a supported search operat Exports table data to an AWS S3 bucket in JSON or CSV format. -Changed in: v4.3.0 — `search_by_conditions` added as a supported search operation + — `search_by_conditions` added as a supported search operation - `operation` _(required)_ — `export_to_s3` - `format` _(required)_ — `json` or `csv` diff --git a/reference_versioned_docs/version-v4/database/schema.md b/reference_versioned_docs/version-v4/database/schema.md index 905aa1f9..b29f5d59 100644 --- a/reference_versioned_docs/version-v4/database/schema.md +++ b/reference_versioned_docs/version-v4/database/schema.md @@ -18,7 +18,7 @@ Harper uses GraphQL Schema Definition Language (SDL) to declaratively define tab ## Overview -Added in: v4.2.0 + Schemas are defined using standard [GraphQL type definitions](https://graphql.org/learn/schema/) with Harper-specific directives. A schema definition: @@ -157,7 +157,7 @@ If no primary key is provided on insert, Harper auto-generates one: - **UUID string** — when type is `String` or `ID` - **Auto-incrementing integer** — when type is `Int`, `Long`, or `Any` -Changed in: v4.4.0 + Auto-incrementing integer primary keys were added. Previously only UUID generation was supported for `ID` and `String` types. @@ -203,7 +203,7 @@ type Event @table { ## Relationships -Added in: v4.3.0 + The `@relationship` directive defines how one table relates to another through a foreign key. Relationships enable join queries and allow related records to be selected as nested properties in query results. @@ -277,7 +277,7 @@ Schemas can also define self-referential relationships, enabling parent-child hi ## Computed Properties -Added in: v4.4.0 + The `@computed` directive marks a field as derived from other fields at query time. Computed properties are not stored in the database but are evaluated when the field is accessed. @@ -334,7 +334,7 @@ Increment `version` whenever the computation function changes. Failing to do so ## Vector Indexing -Added in: v4.6.0 + Use `@indexed(type: "HNSW")` to create a vector index using the Hierarchical Navigable Small World algorithm, designed for fast approximate nearest-neighbor search on high-dimensional vectors. @@ -410,7 +410,7 @@ Arrays of a type are expressed with `[Type]` syntax (e.g., `[Float]` for a vecto ### Blob Type -Added in: v4.5.0 + `Blob` fields are designed for large binary content. Harper's `Blob` type implements the [Web API `Blob` interface](https://developer.mozilla.org/en-US/docs/Web/API/Blob), so all standard `Blob` methods (`.text()`, `.arrayBuffer()`, `.stream()`, `.slice()`) are available. Unlike `Bytes`, blobs are stored separately from the record, support streaming, and do not need to be held entirely in memory. Use `Blob` for content typically larger than 20KB (images, video, audio, large HTML, etc.). diff --git a/reference_versioned_docs/version-v4/database/storage-algorithm.md b/reference_versioned_docs/version-v4/database/storage-algorithm.md index 35af971b..3a3e0087 100644 --- a/reference_versioned_docs/version-v4/database/storage-algorithm.md +++ b/reference_versioned_docs/version-v4/database/storage-algorithm.md @@ -26,7 +26,7 @@ Each Harper table has a single writer process, eliminating deadlocks and ensurin ## Universally Indexed -Changed in: v4.3.0 — Storage performance improvements including better free-space management + — Storage performance improvements including better free-space management For [dynamic schema tables](./overview.md#dynamic-vs-defined-schemas), all top-level attributes are automatically indexed immediately upon ingestion — Harper reflexively creates the attribute and its index as new data arrives. For [schema-defined tables](./schema.md), indexes are created for all attributes marked with `@indexed`. @@ -42,7 +42,7 @@ Within the LMDB implementation, table records are grouped into a single LMDB env ## Compression -Changed in: v4.3.0 — Compression is now enabled by default for all records over 4KB + — Compression is now enabled by default for all records over 4KB Harper compresses record data automatically for records over 4KB. Compression settings can be configured in the [storage configuration](../configuration/options.md). Note that compression settings cannot be changed on existing databases without creating a new compacted copy — see [Compaction](./compaction.md). diff --git a/reference_versioned_docs/version-v4/database/system-tables.md b/reference_versioned_docs/version-v4/database/system-tables.md index 683dfb6e..84da6dd0 100644 --- a/reference_versioned_docs/version-v4/database/system-tables.md +++ b/reference_versioned_docs/version-v4/database/system-tables.md @@ -14,7 +14,7 @@ System tables are prefixed with `hdb_` and reside in the `system` database. ## Analytics Tables -Added in: v4.5.0 (resource and storage analytics expansion) + (resource and storage analytics expansion) ### `hdb_raw_analytics` @@ -121,7 +121,7 @@ For a full reference of available metrics and their fields, see [Analytics](../a ### `hdb_dataloader_hash` -Added in: v4.6.0 + Used internally by the [Data Loader](./data-loader.md) to track which records have been loaded and detect changes. Stores SHA-256 content hashes of data file records so that unchanged records are not re-written on subsequent deployments. diff --git a/reference_versioned_docs/version-v4/database/transaction.md b/reference_versioned_docs/version-v4/database/transaction.md index 3ae8847f..e65fb626 100644 --- a/reference_versioned_docs/version-v4/database/transaction.md +++ b/reference_versioned_docs/version-v4/database/transaction.md @@ -118,9 +118,9 @@ The `original_records` field contains the record state before the operation was Deletes audit log entries older than the specified timestamp. -Changed in: v4.3.0 — Audit log cleanup improved to reduce resource consumption during scheduled cleanups + — Audit log cleanup improved to reduce resource consumption during scheduled cleanups -Changed in: v4.5.0 — Storage reclamation: Harper automatically evicts older audit log entries when free storage drops below a configurable threshold + — Storage reclamation: Harper automatically evicts older audit log entries when free storage drops below a configurable threshold ```json { diff --git a/reference_versioned_docs/version-v4/graphql-querying/overview.md b/reference_versioned_docs/version-v4/graphql-querying/overview.md index 917f0e54..fb56ffc3 100644 --- a/reference_versioned_docs/version-v4/graphql-querying/overview.md +++ b/reference_versioned_docs/version-v4/graphql-querying/overview.md @@ -10,9 +10,9 @@ title: GraphQL Querying # GraphQL Querying -Added in: v4.4.0 (provisional) + (provisional) -Changed in: v4.5.0 (disabled by default, configuration options) + (disabled by default, configuration options) Harper supports GraphQL in a variety of ways. It can be used for [defining schemas](../components/applications.md), and for querying [Resources](../resources/overview.md). diff --git a/reference_versioned_docs/version-v4/http/api.md b/reference_versioned_docs/version-v4/http/api.md index bde865df..fb1b896a 100644 --- a/reference_versioned_docs/version-v4/http/api.md +++ b/reference_versioned_docs/version-v4/http/api.md @@ -243,7 +243,7 @@ A Node.js [`net.Server`](https://nodejs.org/api/net.html#class-netserver) or [`t ## `server.authenticateUser(username, password)` -Added in: v4.5.0 + ```ts server.authenticateUser(username: string, password: string): Promise diff --git a/reference_versioned_docs/version-v4/http/configuration.md b/reference_versioned_docs/version-v4/http/configuration.md index abc21a29..880370cc 100644 --- a/reference_versioned_docs/version-v4/http/configuration.md +++ b/reference_versioned_docs/version-v4/http/configuration.md @@ -50,7 +50,7 @@ tls: ### `http.http2` -Added in: v4.5.0 + Type: `boolean` @@ -110,7 +110,7 @@ The maximum estimated request queue time in milliseconds. When the queue exceeds ### `http.compressionThreshold` -Added in: v4.2.0 + Type: `number` @@ -151,7 +151,7 @@ http: ### `http.corsAccessControlAllowHeaders` -Added in: v4.5.0 + Type: `string` @@ -163,7 +163,7 @@ Comma-separated list of headers allowed in the [`Access-Control-Allow-Headers`]( ### `http.sessionAffinity` -Added in: v4.1.0 + Type: `string` @@ -189,7 +189,7 @@ If Harper is behind a reverse proxy and you use `ip`, all requests will share th ### `http.mtls` -Added in: v4.3.0 + Type: `boolean | object` @@ -212,7 +212,7 @@ For granular control, specify an object: ### `http.mtls.certificateVerification` -Added in: v4.7.0 (OCSP support) + (OCSP support) Type: `boolean | object` @@ -281,7 +281,7 @@ HTTP request logging is disabled by default. Enabling the `http.logging` block t ### `http.logging` -Added in: v4.6.0 + Type: `object` diff --git a/reference_versioned_docs/version-v4/http/overview.md b/reference_versioned_docs/version-v4/http/overview.md index 06858a8c..f47f0836 100644 --- a/reference_versioned_docs/version-v4/http/overview.md +++ b/reference_versioned_docs/version-v4/http/overview.md @@ -14,9 +14,9 @@ Harper includes a built-in HTTP server that serves as the primary interface for Harper's HTTP server is multi-threaded. Each thread runs an independent copy of the HTTP stack, and incoming connections are distributed across threads using `SO_REUSEPORT` socket sharing — the most performant mechanism available for multi-threaded socket handling. -Added in: v4.1.0 (worker threads for HTTP requests) + (worker threads for HTTP requests) -Changed in: v4.2.0 (switched from process-per-thread model with session-affinity delegation to `SO_REUSEPORT` socket sharing) + (switched from process-per-thread model with session-affinity delegation to `SO_REUSEPORT` socket sharing) In previous versions: Session-affinity based socket delegation was used to route requests. This has been deprecated in favor of `SO_REUSEPORT`. @@ -44,7 +44,7 @@ See [Configuration](./configuration) for TLS options and [Security](../security/ ## HTTP/2 -Added in: v4.5.0 + HTTP/2 can be enabled with the `http2: true` option in `harperdb-config.yaml`. When enabled, HTTP/2 applies to all API endpoints served on `http.securePort` (HTTP/2 requires TLS). diff --git a/reference_versioned_docs/version-v4/logging/configuration.md b/reference_versioned_docs/version-v4/logging/configuration.md index d32b0f28..d556a4fe 100644 --- a/reference_versioned_docs/version-v4/logging/configuration.md +++ b/reference_versioned_docs/version-v4/logging/configuration.md @@ -182,7 +182,7 @@ logging: ### `logging.auditAuthEvents.logFailed` -Added in: v4.2.0 + Type: `boolean` @@ -198,7 +198,7 @@ Example log entry: ### `logging.auditAuthEvents.logSuccessful` -Added in: v4.2.0 + Type: `boolean` @@ -221,7 +221,7 @@ logging: ## Per-Component Logging -Added in: v4.6.0 + Harper supports independent logging configurations for different components. Each component logger can have its own `path`, `root`, `level`, `tag`, and `stdStreams` settings. All components default to the main `logging` configuration unless overridden. diff --git a/reference_versioned_docs/version-v4/logging/overview.md b/reference_versioned_docs/version-v4/logging/overview.md index 862aa01c..8d276e80 100644 --- a/reference_versioned_docs/version-v4/logging/overview.md +++ b/reference_versioned_docs/version-v4/logging/overview.md @@ -14,7 +14,7 @@ Harper's core logging system is used for diagnostics, monitoring, and observabil ## Log File -Changed in: v4.1.0 — All logs consolidated into a single `hdb.log` file + — All logs consolidated into a single `hdb.log` file All standard log output is written to `/log/hdb.log` (default: `~/hdb/log/hdb.log`). @@ -60,7 +60,7 @@ The default log level is `warn`. Setting a level includes that level and all mor ## Standard Streams -Changed in: v4.6.0 + By default, logs are written only to the log file. To also log to `stdout`/`stderr`, set [`logging.stdStreams: true`](./configuration.md#loggingstdstreams) (this is automatically enabled by the `DEFAULT_MODE=dev` configuration during installation). diff --git a/reference_versioned_docs/version-v4/mqtt/configuration.md b/reference_versioned_docs/version-v4/mqtt/configuration.md index 93205ee9..4b43c711 100644 --- a/reference_versioned_docs/version-v4/mqtt/configuration.md +++ b/reference_versioned_docs/version-v4/mqtt/configuration.md @@ -78,7 +78,7 @@ mqtt: ### `mqtt.network.mtls` -Added in: v4.3.0 + Type: `boolean | object` diff --git a/reference_versioned_docs/version-v4/mqtt/overview.md b/reference_versioned_docs/version-v4/mqtt/overview.md index 3da53187..ffa6537b 100644 --- a/reference_versioned_docs/version-v4/mqtt/overview.md +++ b/reference_versioned_docs/version-v4/mqtt/overview.md @@ -8,7 +8,7 @@ title: MQTT -Added in: v4.2.0 + Harper includes a built-in MQTT broker that provides real-time pub/sub messaging deeply integrated with the database. Unlike a generic MQTT broker, Harper's MQTT implementation connects topics directly to database records — publishing to a topic writes to the database, and subscribing to a topic delivers live updates for the corresponding record. @@ -53,7 +53,7 @@ Harper supports multi-level topics for both publishing and subscribing: ### Last Will -Added in: v4.3.0 + Harper supports the MQTT Last Will and Testament feature. If a client disconnects unexpectedly, the broker publishes the configured will message on its behalf. diff --git a/reference_versioned_docs/version-v4/operations-api/operations.md b/reference_versioned_docs/version-v4/operations-api/operations.md index 690aeecd..cd727625 100644 --- a/reference_versioned_docs/version-v4/operations-api/operations.md +++ b/reference_versioned_docs/version-v4/operations-api/operations.md @@ -961,7 +961,7 @@ The following operations are deprecated and should not be used in new code. Custom Functions were the precursor to the Component architecture introduced in v4.2.0. These operations are preserved for backward compatibility. -Deprecated in: v4.2.0 (moved to legacy in v4.7+) + (moved to legacy in v4.7+) For modern equivalents, see [Components Overview](../components/overview.md). diff --git a/reference_versioned_docs/version-v4/replication/clustering.md b/reference_versioned_docs/version-v4/replication/clustering.md index 98937eef..05bc4c1a 100644 --- a/reference_versioned_docs/version-v4/replication/clustering.md +++ b/reference_versioned_docs/version-v4/replication/clustering.md @@ -122,7 +122,7 @@ Removes a Harper node from the cluster and stops all replication to and from tha Returns an array of status objects from the cluster, including active WebSocket connections and replication timing statistics. -Added in: v4.4.0; timing statistics added in v4.5.0 + ; timing statistics added in v4.5.0 **Parameters**: diff --git a/reference_versioned_docs/version-v4/replication/overview.md b/reference_versioned_docs/version-v4/replication/overview.md index 7847ecc9..0139d967 100644 --- a/reference_versioned_docs/version-v4/replication/overview.md +++ b/reference_versioned_docs/version-v4/replication/overview.md @@ -135,7 +135,7 @@ Credentials are not stored — they are discarded immediately after use. You can ### Revoking Certificates -Added in: v4.5.0 + Certificates used in replication can be revoked using the certificate serial number. Use either the `revoked_certificates` attribute in the `hdb_nodes` system table or route config: @@ -242,7 +242,7 @@ Update a subscription with `update_node`: ## Monitoring Replication -Added in: v4.5.0 (cluster status timing statistics) + (cluster status timing statistics) Use `cluster_status` to monitor the state of replication: diff --git a/reference_versioned_docs/version-v4/replication/sharding.md b/reference_versioned_docs/version-v4/replication/sharding.md index 6625045a..59efbfd8 100644 --- a/reference_versioned_docs/version-v4/replication/sharding.md +++ b/reference_versioned_docs/version-v4/replication/sharding.md @@ -8,9 +8,9 @@ title: Sharding # Sharding -Added in: v4.4.0 (provisional) + (provisional) -Changed in: v4.5.0 — expanded sharding functionality: Harper now honors write requests with residency information that will not be stored on the local node, and nodes can be declaratively configured as part of a shard. + — expanded sharding functionality: Harper now honors write requests with residency information that will not be stored on the local node, and nodes can be declaratively configured as part of a shard. Harper's replication system supports sharding — storing different data across different subsets of nodes — while still allowing data to be accessed from any node in the cluster. This enables horizontal scalability for storage and write performance, while maintaining optimal data locality and consistency. diff --git a/reference_versioned_docs/version-v4/resources/overview.md b/reference_versioned_docs/version-v4/resources/overview.md index 09f47e5b..e6e22c13 100644 --- a/reference_versioned_docs/version-v4/resources/overview.md +++ b/reference_versioned_docs/version-v4/resources/overview.md @@ -13,7 +13,7 @@ Harper's Resource API is the foundation for building custom data access logic an A **Resource** is a class that provides a unified interface for a set of records or entities. Harper's built-in tables extend the base `Resource` class, and you can extend either `Resource` or a table class to implement custom behavior for any data source — internal or external. -Added in: v4.2.0 + The Resource API is designed to mirror REST/HTTP semantics: methods map directly to HTTP verbs (`get`, `put`, `patch`, `post`, `delete`), making it straightforward to build API endpoints alongside custom data logic. diff --git a/reference_versioned_docs/version-v4/resources/query-optimization.md b/reference_versioned_docs/version-v4/resources/query-optimization.md index 6501565a..9951a3c9 100644 --- a/reference_versioned_docs/version-v4/resources/query-optimization.md +++ b/reference_versioned_docs/version-v4/resources/query-optimization.md @@ -7,7 +7,7 @@ title: Query Optimization # Query Optimization -Added in: v4.3.0 (query planning and execution improvements) + (query planning and execution improvements) Harper has powerful query functionality with excellent performance characteristics. Like any database, different queries can vary significantly in performance. Understanding how querying works helps you write queries that perform well as your dataset grows. @@ -89,7 +89,7 @@ type Brand @table { } ``` -Added in: v4.3.0 + ## Sorting diff --git a/reference_versioned_docs/version-v4/resources/resource-api.md b/reference_versioned_docs/version-v4/resources/resource-api.md index 8c047d48..8c9e28ef 100644 --- a/reference_versioned_docs/version-v4/resources/resource-api.md +++ b/reference_versioned_docs/version-v4/resources/resource-api.md @@ -13,7 +13,7 @@ title: Resource API # Resource API -Added in: v4.2.0 + The Resource API provides a unified JavaScript interface for accessing, querying, modifying, and subscribing to data resources in Harper. Tables extend the base `Resource` class, and all resource interactions — whether from HTTP requests, MQTT messages, or application code — flow through this interface. @@ -32,7 +32,7 @@ This page documents V2 behavior (`loadAsInstance = false`). For V1 (legacy insta ### V2 Behavioral Differences from V1 -Changed in: v4.6.0 (Resource API upgrades that formalized V2) + (Resource API upgrades that formalized V2) When `loadAsInstance = false`: @@ -111,7 +111,7 @@ put(target, data) { Called for HTTP PATCH requests. Merges `data` into the existing record, preserving any properties not included in `data`. -Added in: v4.3.0 (CRDT support for individual property updates via PATCH) + (CRDT support for individual property updates via PATCH) ### `post(target: RequestTarget | Id, data: object): void | Response` @@ -141,7 +141,7 @@ The `Updatable` class provides direct property access plus: Adds `value` to `property` using CRDT incrementation — safe for concurrent updates across threads and nodes. -Added in: v4.3.0 + ```javascript post(target, data) { @@ -286,7 +286,7 @@ Save a record (create or replace). The second form reads the primary key from th Create a new record with an auto-generated primary key. Returns the created record. Do not include a primary key in the `record` argument. -Added in: v4.2.0 + ### `patch(target: RequestTarget | Id, updates: object, context?): Promise` @@ -316,7 +316,7 @@ Query the table. See [Query Object](#query-object) below for available query opt Define the compute function for a `@computed` schema attribute. -Added in: v4.4.0 + ```javascript MyTable.setComputedAttribute('fullName', (record) => `${record.firstName} ${record.lastName}`); @@ -326,7 +326,7 @@ MyTable.setComputedAttribute('fullName', (record) => `${record.firstName} ${reco Returns the number of records in the table. By default returns an approximate (fast) count. Pass `{ exactCount: true }` for a precise count. -Added in: v4.5.0 + ### `sourcedFrom(Resource, options?)` @@ -352,7 +352,7 @@ static parsePath(path) { Set this static property to `true` to map the full URL (including query string) as the primary key, bypassing query parsing. -Added in: v4.5.0 (documented in improved URL path parsing) + (documented in improved URL path parsing) ```javascript export class MyTable extends tables.MyTable { @@ -415,7 +415,7 @@ Product.search({ Product.search({ conditions: [{ attribute: ['brand', 'name'], value: 'Harper' }] }); ``` -Added in: v4.3.0 + ### `operator` @@ -511,7 +511,7 @@ return { status: 200, headers: { 'X-Custom-Header': 'value' }, data: { message: `body` must be a string, `Buffer`, Node.js stream, or `ReadableStream`. `data` is an object that will be serialized. -Added in: v4.4.0 + ### Throwing Errors diff --git a/reference_versioned_docs/version-v4/rest/overview.md b/reference_versioned_docs/version-v4/rest/overview.md index e38b2e4c..a2121f98 100644 --- a/reference_versioned_docs/version-v4/rest/overview.md +++ b/reference_versioned_docs/version-v4/rest/overview.md @@ -9,7 +9,7 @@ title: REST Overview # REST Overview -Added in: v4.2.0 + Harper provides a powerful, efficient, and standard-compliant HTTP REST interface for interacting with tables and other resources. The REST interface is the recommended interface for data access, querying, and manipulation over HTTP, providing the best performance and HTTP interoperability with different clients. @@ -47,7 +47,7 @@ The REST interface follows a consistent URL structure: | `/my-resource/record-id/` | Trailing slash — the collection of records with the given id prefix | | `/my-resource/record-id/with/multiple/parts` | Record id with multiple path segments | -Changed in: v4.5.0 — Resources can be defined with nested paths and accessed by exact path without a trailing slash. The `id.property` dot syntax for accessing properties via URL is only applied to properties declared in a schema. + — Resources can be defined with nested paths and accessed by exact path without a trailing slash. The `id.property` dot syntax for accessing properties via URL is only applied to properties declared in a schema. ## HTTP Methods @@ -107,7 +107,7 @@ Content-Type: application/json Partially update a record, merging only the provided properties (CRDT-style update). Unspecified properties are preserved. -Added in: v4.3.0 + ```http PATCH /MyTable/123 @@ -140,7 +140,7 @@ See [Content Types](./content-types.md) for the full list of supported formats a ## OpenAPI -Added in: v4.3.0 + Harper automatically generates an OpenAPI specification for all resources exported via a schema. This endpoint is available at: diff --git a/reference_versioned_docs/version-v4/rest/querying.md b/reference_versioned_docs/version-v4/rest/querying.md index 37ad0b4d..b11f0ff4 100644 --- a/reference_versioned_docs/version-v4/rest/querying.md +++ b/reference_versioned_docs/version-v4/rest/querying.md @@ -27,7 +27,7 @@ GET /Product/?category=software&inStock=true ### Null Queries -Added in: v4.3.0 + Query for null values or non-null values: @@ -173,11 +173,11 @@ GET /Product/?rating=gt=3&sort(+name) GET /Product/?sort(+rating,-price) ``` -Added in: v4.3.0 + ## Relationships and Joins -Added in: v4.3.0 + Harper supports querying across related tables through dot-syntax chained attributes. Relationships must be defined in the schema using `@relation`. @@ -237,7 +237,7 @@ The array order of `resellerIds` is preserved when resolving the relationship. ## Property Access via URL -Changed in: v4.5.0 + Access a specific property of a record by appending it with dot syntax to the record id: @@ -249,7 +249,7 @@ This only works for properties declared in the schema. As of v4.5.0, dots in URL ## `directURLMapping` Option -Added in: v4.5.0 + Resources can be configured with `directURLMapping: true` for more direct URL path handling. When enabled, the URL path is mapped more directly to the resource without the default query parameter parsing semantics. See [Database / Schema](../database/schema.md) for configuration details. diff --git a/reference_versioned_docs/version-v4/rest/server-sent-events.md b/reference_versioned_docs/version-v4/rest/server-sent-events.md index bdffaa1f..7dc98a0e 100644 --- a/reference_versioned_docs/version-v4/rest/server-sent-events.md +++ b/reference_versioned_docs/version-v4/rest/server-sent-events.md @@ -7,7 +7,7 @@ title: Server-Sent Events # Server-Sent Events -Added in: v4.2.0 + Harper supports Server-Sent Events (SSE), a simple and efficient mechanism for browser-based applications to receive real-time updates from the server over a standard HTTP connection. SSE is a one-directional transport — the server pushes events to the client, and the client has no way to send messages back on the same connection. diff --git a/reference_versioned_docs/version-v4/rest/websockets.md b/reference_versioned_docs/version-v4/rest/websockets.md index 005b6795..2675e57e 100644 --- a/reference_versioned_docs/version-v4/rest/websockets.md +++ b/reference_versioned_docs/version-v4/rest/websockets.md @@ -8,7 +8,7 @@ title: WebSockets # WebSockets -Added in: v4.2.0 + Harper supports WebSocket connections through the REST interface, enabling real-time bidirectional communication with resources. WebSocket connections target a resource URL path — by default, connecting to a resource subscribes to changes for that resource. diff --git a/reference_versioned_docs/version-v4/security/certificate-management.md b/reference_versioned_docs/version-v4/security/certificate-management.md index 79f254b8..b7357e36 100644 --- a/reference_versioned_docs/version-v4/security/certificate-management.md +++ b/reference_versioned_docs/version-v4/security/certificate-management.md @@ -87,7 +87,7 @@ For full mTLS authentication details, see [mTLS Authentication](./mtls-authentic ## Certificate Verification -Added in: v4.5.0 (certificate revocation); v4.7.0 (OCSP support) + (certificate revocation); v4.7.0 (OCSP support) When using mTLS, enable certificate verification to ensure revoked certificates cannot authenticate even if still within their validity period: @@ -122,7 +122,7 @@ For full configuration options and troubleshooting, see [Certificate Verificatio ## Dynamic Certificate Management -Added in: v4.4.0 + Certificates — including CAs and private keys — can be dynamically managed without restarting Harper. diff --git a/reference_versioned_docs/version-v4/security/certificate-verification.md b/reference_versioned_docs/version-v4/security/certificate-verification.md index e2ee6ad2..2b00542f 100644 --- a/reference_versioned_docs/version-v4/security/certificate-verification.md +++ b/reference_versioned_docs/version-v4/security/certificate-verification.md @@ -7,9 +7,9 @@ title: Certificate Verification -Added in: v4.5.0 + -Changed in: v4.7.0 (OCSP support added) + (OCSP support added) Certificate verification (also called certificate revocation checking) ensures that revoked certificates cannot be used for mTLS authentication, even if they are otherwise valid and trusted. This is a critical security control for environments where certificates may need to be revoked before their expiration date — due to compromise, employee departure, or other security concerns. diff --git a/reference_versioned_docs/version-v4/security/configuration.md b/reference_versioned_docs/version-v4/security/configuration.md index 717bc180..e77aff2b 100644 --- a/reference_versioned_docs/version-v4/security/configuration.md +++ b/reference_versioned_docs/version-v4/security/configuration.md @@ -35,7 +35,7 @@ How long (in milliseconds) an authentication result — a particular `Authorizat _Type: boolean — Default: `true`_ -Added in: v4.2.0 + Enables cookie-based sessions to maintain an authenticated session across requests. This is the preferred authentication mechanism for web browsers: cookies hold the token securely without exposing it to JavaScript, reducing XSS vulnerability risk. @@ -55,7 +55,7 @@ How long a JWT refresh token remains valid before expiring. Accepts [`jsonwebtok _Type: string — Default: `sha256`_ -Added in: v4.5.0 + Password hashing algorithm used when storing user passwords. Replaced the previous MD5 hashing. Options: diff --git a/reference_versioned_docs/version-v4/security/mtls-authentication.md b/reference_versioned_docs/version-v4/security/mtls-authentication.md index d1f202e9..2e89dcbc 100644 --- a/reference_versioned_docs/version-v4/security/mtls-authentication.md +++ b/reference_versioned_docs/version-v4/security/mtls-authentication.md @@ -6,7 +6,7 @@ title: mTLS Authentication -Added in: v4.3.0 + Harper supports Mutual TLS (mTLS) authentication for incoming HTTP connections. When enabled, the client must present a certificate signed by a trusted Certificate Authority (CA). If the certificate is valid and trusted, the connection is authenticated using the user whose username matches the `CN` (Common Name) from the client certificate's `subject`. diff --git a/reference_versioned_docs/version-v4/static-files/overview.md b/reference_versioned_docs/version-v4/static-files/overview.md index 2d0ea9f5..7104e0c6 100644 --- a/reference_versioned_docs/version-v4/static-files/overview.md +++ b/reference_versioned_docs/version-v4/static-files/overview.md @@ -44,7 +44,7 @@ Files are accessed relative to the matched directory root, so `GET /index.html` ## `files` and `urlPath` Options -Added in: v4.5 + `static` is a [Plugin](../components/overview.md) and supports the standard `files` and `urlPath` configuration options for controlling which files to serve and at what URL path. @@ -62,7 +62,7 @@ See [Components Overview](../components/overview.md) for full `files` glob patte ## Additional Options -Added in: v4.7 + In addition to the standard `files`, `urlPath`, and `timeout` options, `static` supports these configuration options: @@ -76,7 +76,7 @@ In addition to the standard `files`, `urlPath`, and `timeout` options, `static` ## Auto-Updates -Added in: v4.7.0 + Because `static` uses the Plugin API, it automatically responds to changes without requiring a Harper restart. Adding, removing, or modifying files — or updating `config.yaml` — takes effect immediately. diff --git a/reference_versioned_docs/version-v4/users-and-roles/configuration.md b/reference_versioned_docs/version-v4/users-and-roles/configuration.md index 8177180b..37565b65 100644 --- a/reference_versioned_docs/version-v4/users-and-roles/configuration.md +++ b/reference_versioned_docs/version-v4/users-and-roles/configuration.md @@ -52,7 +52,7 @@ editor: ## Password Hashing -Added in: v4.5.0 + Harper supports two password hashing algorithms, replacing the previous MD5 hashing: diff --git a/scripts/replace-version-annotations.js b/scripts/replace-version-annotations.js new file mode 100644 index 00000000..fead988c --- /dev/null +++ b/scripts/replace-version-annotations.js @@ -0,0 +1,40 @@ +#!/usr/bin/env node +// Replaces plain-text "Added in: vX.Y.Z", "Changed in: vX.Y.Z", "Deprecated in: vX.Y.Z" +// standalone lines with JSX components in all v4 reference docs. + +const fs = require('fs'); +const path = require('path'); + +const dir = path.join(__dirname, '../reference'); + +const typeMap = { Added: 'added', Changed: 'changed', Deprecated: 'deprecated' }; + +function walk(d) { + return fs.readdirSync(d).flatMap((f) => { + const p = path.join(d, f); + return fs.statSync(p).isDirectory() ? walk(p) : p.endsWith('.md') ? [p] : []; + }); +} + +let changed = 0; + +for (const file of walk(dir)) { + let src = fs.readFileSync(file, 'utf8'); + + // Match standalone lines: "Added in: v4.3.0" or "Added in: v4.3.0 (some note)" + let out = src.replace(/^(Added|Changed|Deprecated) in: (v[\d]+\.[\d]+(?:\.[\d]+)?)(.*)?$/gm, (_, word, ver, rest) => { + const type = typeMap[word]; + const note = rest ? rest.trim() : ''; + const tag = + type === 'added' ? `` : ``; + return note ? `${tag} ${note}` : tag; + }); + + if (out !== src) { + fs.writeFileSync(file, out); + changed++; + console.log('updated:', path.relative(dir, file)); + } +} + +console.log(`\nDone. ${changed} file(s) updated.`); diff --git a/src/components/VersionBadge.module.css b/src/components/VersionBadge.module.css new file mode 100644 index 00000000..6f748ecf --- /dev/null +++ b/src/components/VersionBadge.module.css @@ -0,0 +1,7 @@ +.badge { + display: inline-block; + font-size: 0.8rem; + color: var(--ifm-color-content-secondary); + font-style: italic; + margin-bottom: 0.75rem; +} diff --git a/src/components/VersionBadge.tsx b/src/components/VersionBadge.tsx new file mode 100644 index 00000000..c3ace2e0 --- /dev/null +++ b/src/components/VersionBadge.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import styles from './VersionBadge.module.css'; + +type VersionBadgeType = 'added' | 'changed' | 'deprecated'; + +const LABELS: Record = { + added: 'Added in', + changed: 'Changed in', + deprecated: 'Deprecated in', +}; + +interface VersionBadgeProps { + type?: VersionBadgeType; + version: string; +} + +export default function VersionBadge({ type = 'added', version }: VersionBadgeProps) { + return ( + + {LABELS[type]}: {version} + + ); +} diff --git a/src/theme/MDXComponents.tsx b/src/theme/MDXComponents.tsx new file mode 100644 index 00000000..7c722ec4 --- /dev/null +++ b/src/theme/MDXComponents.tsx @@ -0,0 +1,7 @@ +import MDXComponents from '@theme-original/MDXComponents'; +import VersionBadge from '@site/src/components/VersionBadge'; + +export default { + ...MDXComponents, + VersionBadge, +}; From 99da88ed39a173baa0a22081e4da29894de4ca58 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 09:30:04 -0600 Subject: [PATCH 46/51] update dev docs for new VersionBadge --- CONTRIBUTING.md | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f5cb1b22..96546a42 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -141,24 +141,34 @@ Deprecated or discouraged features belong in `reference/legacy/` (current) or `r ### Version Annotations -Because the Reference section consolidates all minor versions of a major into one document, features are annotated inline to indicate when they were introduced or changed. Follow the Node.js documentation convention: +Because the Reference section consolidates all minor versions of a major into one document, features are annotated inline to indicate when they were introduced or changed. + +Use the `` component. It is registered globally and requires no import in `.md` or `.mdx` files. + +```mdx + + + +``` + +The `type` prop defaults to `"added"`, so the most common case is just `version`. Place the badge on its own line directly below the heading it describes: **New feature:** -```markdown +```mdx ## Relationships -Added in: v4.3.0 + The `@relation` directive allows you to define relationships between tables... ``` **Changed behavior:** -```markdown +```mdx ### Default Port -Changed in: v4.5.0 + The default MQTT port changed from 9925 to 9933. In previous versions of v4, the default was 9925. @@ -166,28 +176,26 @@ In previous versions of v4, the default was 9925. **Deprecated feature:** -```markdown +```mdx ## SQL Querying -Deprecated in: v4.2.0 + SQL is still supported but discouraged. See [Database](../database/overview.md) for modern alternatives. ``` -**Configuration option:** - -```markdown -### `logger.level` +**Configuration option** (inline in a list): -- Type: `string` -- Default: `"info"` -- Added in: v4.1.0 +```mdx +- `logger.level` — Log level; _Default_: `"info"` (Added in: v4.1.0) ``` -If the introduction version is inferred from version comparison rather than confirmed by release notes, note it: +For inline config option annotations inside list items, plain text `(Added in: vX.Y.Z)` is fine — using the component mid-sentence is awkward. Reserve `` for standalone placement after headings. -```markdown -Added in: v4.3.0 (inferred from version comparison, needs verification) +If the introduction version is inferred rather than confirmed by release notes, append a note: + +```mdx + (inferred from version comparison, needs verification) ``` ## Known Issues From add5740afde38ef04f717ee06dd32773c1786e2a Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 09:36:27 -0600 Subject: [PATCH 47/51] fix config --- docusaurus.config.ts | 69 ++++++-------------------------------------- 1 file changed, 9 insertions(+), 60 deletions(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 8c8987a1..49695a13 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -83,17 +83,15 @@ const config: Config = { path: 'reference', routeBasePath: 'reference', sidebarPath: './sidebarsReference.ts', - // editUrl: ({ versionDocsDirPath, docPath }) => { - // // For versioned docs: versionDocsDirPath is like 'versioned_docs/version-4' - // // For current docs: versionDocsDirPath is 'docs' - // if (versionDocsDirPath.startsWith('versioned_docs')) { - // // Versioned docs are in versioned_docs/version-X.X/ - // return `https://github.com/HarperFast/documentation/blob/main/${versionDocsDirPath}/${docPath}`; - // } else { - // // Current docs are in the root docs/ directory - // return `https://github.com/HarperFast/documentation/blob/main/docs/${docPath}`; - // } - // }, + editUrl: ({ versionDocsDirPath, docPath }) => { + // For versioned reference docs: versionDocsDirPath is like 'reference_versioned_docs/version-v4' + // For current docs: versionDocsDirPath is 'reference' + if (versionDocsDirPath.startsWith('reference_versioned_docs')) { + return `https://github.com/HarperFast/documentation/blob/main/${versionDocsDirPath}/${docPath}`; + } else { + return `https://github.com/HarperFast/documentation/blob/main/reference/${docPath}`; + } + }, lastVersion: 'current', includeCurrentVersion: false, versions: { @@ -102,55 +100,6 @@ const config: Config = { }, // Converts npm commands in markdown code blocks to show npm/yarn/pnpm tabs remarkPlugins: [[require('@docusaurus/remark-plugin-npm2yarn'), { sync: true }]], - // Filter out index files that are used as category links - async sidebarItemsGenerator({ defaultSidebarItemsGenerator, ...args }) { - const sidebarItems = await defaultSidebarItemsGenerator(args); - - // Function to recursively process sidebar items - function filterIndexFiles(items: any[]): any[] { - return items - .filter((item) => { - // Filter out index.md files at the root of autogenerated directories - // when they would be duplicates of category links - if (item.type === 'doc' && item.id?.endsWith('/index')) { - // Check if the category metadata has a link to this index - const dirName = args.item?.dirName; - if (dirName && item.id === `${dirName}/index`) { - const categoryMeta = args.categoriesMetadata?.[dirName]; - if (categoryMeta?.link?.type === 'doc' && categoryMeta.link.id === item.id) { - const versionName = args.version?.versionName || 'current'; - console.log(`✂️ 🔗 [v${versionName}] Removing ${item.id} from ${dirName} (category link exists)`); - return false; - } - } - // Keep other index files that are in subcategories - return true; - } - - // Process categories recursively - if (item.type === 'category' && item.items) { - return { - ...item, - items: filterIndexFiles(item.items), - }; - } - - return true; - }) - .map((item) => { - // For categories, recursively filter their items - if (item.type === 'category' && item.items) { - return { - ...item, - items: filterIndexFiles(item.items), - }; - } - return item; - }); - } - - return filterIndexFiles(sidebarItems); - }, }, ], From 256de664cf4526bfc78c7a82d259db929f84845f Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 11:05:25 -0600 Subject: [PATCH 48/51] fix config to throw on broken links --- docusaurus.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 49695a13..2c686692 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -54,7 +54,7 @@ const config: Config = { organizationName: 'HarperFast', // Usually your GitHub org/user name. projectName: 'documentation', // Usually your repo name. - onBrokenLinks: 'warn', + onBrokenLinks: 'throw', plugins: [ [ From 7085d65a33f8eefb2b99f22235009b283a1212b4 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 11:25:13 -0600 Subject: [PATCH 49/51] update contrib and simplify readme --- CONTRIBUTING.md | 2 +- README.md | 81 +------------------------------------------------ 2 files changed, 2 insertions(+), 81 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 96546a42..2cf38f0c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ npm run dev 4. Format your code: ```bash -npm run format +npm run format:write ``` 5. Push changes to a branch and create a pull request diff --git a/README.md b/README.md index fa71a219..c400f503 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Harper Documentation -Documentation website for [Harper](https://harpersystems.dev), a fullstack, serverful Node.js application platform. +Documentation website for [Harper](https://harper.fast), a fullstack, serverful Node.js application platform. Powered by [Docusaurus](https://docusaurus.io/). @@ -11,82 +11,3 @@ This documentation site is open source! If you notice something out-of-place or have suggestions for improvement, please feel free to submit an issue and/or a pull request. Make sure to follow the relevant bug report and content/feature request templates. For more information on contributing, follow the [contribution guide](CONTRIBUTING.md). - -## 🚀 Quick Start - -```bash -# Install dependencies -npm install - -# Start development server -npm start -# Opens at http://localhost:3000 - -# Build for production -npm run build - -# Serve production build locally -npm run serve -``` - -## 📁 Directory Structure - -```text -├── docs/ # Main documentation content -├── static/ # Static assets -│ ├── img/ # Site images and logos (versioned) -│ └── js/ # JavaScript files -├── src/ # React components and custom pages -│ ├── css/ # Custom styles -│ └── pages/ # Custom pages -├── versioned_docs/ # Documentation for previous versions -├── versioned_sidebars/ # Sidebar configurations for versions -├── docusaurus.config.ts # Main Docusaurus configuration -├── sidebars.ts # Sidebar navigation structure -├── redirects.ts # URL redirects configuration -└── versions.json # Version configuration -``` - -## 🛠️ Development - -### Running Locally - -```bash -# Start the development server with hot reload -npm start - -# Clear cache if you encounter issues -npm run clear -``` - -The development server runs at `http://localhost:3000` and automatically reloads when you make changes. - -### Other Commands - -```bash -# Type checking -npm run typecheck - -# Format code -npm run format - -# Clean all generated files and caches -npm run clear -``` - -## 📋 Cutting a New Version - -When releasing a new version of Harper documentation: - -```bash -# Cut a new version (e.g., 4.7) -npm run version -``` - -This will: - -1. Copy current docs to versioned_docs/version-4.7 -2. Copy current sidebars to versioned_sidebars -3. Update versions.json - -After cutting a version, update `docusaurus.config.ts` to set the new `lastVersion`. From 77065b714d13f14a7b40095f79413855474790a3 Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 11:25:30 -0600 Subject: [PATCH 50/51] add ai generated retro for future reflection --- v4-docs-project-retrospective.md | 280 +++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 v4-docs-project-retrospective.md diff --git a/v4-docs-project-retrospective.md b/v4-docs-project-retrospective.md new file mode 100644 index 00000000..30850ab6 --- /dev/null +++ b/v4-docs-project-retrospective.md @@ -0,0 +1,280 @@ +# Harper v4 Documentation Rewrite — Project Retrospective + +**Date**: 2026-03-31 +**Branch**: `major-version-reorg` +**Duration**: ~6 weeks (2026-02-18 → 2026-03-31) + +--- + +## What We Set Out to Do + +The Harper v4 reference documentation had accumulated across seven minor version folders (`versioned_docs/version-4.1/` through `versioned_docs/version-4.7/`). Each minor version was a near-complete copy of the previous with additions — so any given page existed seven times, with subtle diffs that were nearly impossible to reason about together. On top of that, the content was organized by _user role_ ("Developers", "Administration", "Deployments") rather than by _feature_, which made individual capabilities like MQTT or Static Files genuinely hard to discover. + +The project had two simultaneous transformation goals: + +**Horizontal consolidation**: Merge seven versioned folders into a single `reference/v4/` document, using inline version annotations (Node.js documentation style) to record when features were added, changed, or deprecated across minor versions. + +**Vertical reorganization**: Restructure content from role-based groupings to a flat, feature-based hierarchy where each Harper built-in plugin or capability is a top-level section immediately visible in the sidebar. + +An additional constraint: the old `/docs/` URL space had been live for years, with backlinks across the internet and real traffic measured in Google Analytics. Every page needed a redirect to its new location — and the redirect mapping needed to be driven by data, not guesswork. + +--- + +## Planning Phase (2026-02-17 → 2026-02-19) + +### The Research Foundation + +Before any migration tooling or structure was created, manual research was done to map the evolution of Harper features across all seven minor versions. This is documented in [v4-docs-research.md](./v4-docs-research.md), which walks through what changed at each version from v4.1 to v4.7. + +Notable findings from this research: + +- The role-based navigation (`Developers` / `Administration`) had silently broken in v4.4 when [PR #303](https://github.com/HarperFast/documentation/blob/ade07fd9428b0321c047ac8243ad1106bb0de2a8/versioned_sidebars/version-4.4-sidebars.json) restructured developer onboarding and removed the `developers/` tab from the sidebar — those paths existed but were invisible for ~4 months. +- The evolution of "Custom Functions → Components → Applications/Extensions → Plugins" was one of the trickiest naming threads to track, since AI-generated timelines kept getting confused by the naming history. +- Transaction logging and audit logging had historically lived under `logging/` but conceptually belonged in `database/` — this was one of several reorganization decisions made during research. + +An AI-generated feature history file ([v4-feature-history-ai-gen.md](./v4-feature-history-ai-gen.md)) was also produced but flagged explicitly as "use with caution" — AI struggled with the naming evolution and the research notes reflect that the human was better positioned to piece it together. + +### The Plan Documents + +On **2026-02-18**, commit [`78eca4be`](https://github.com/HarperFast/documentation/commit/78eca4bed4630fd81f8f9328c7ed7e0e603a9589) created five planning documents in one shot (4,487 lines across 7 files): + +- **[v4-docs-project-brief.md](./v4-docs-project-brief.md)** — executive summary, status dashboard, key decisions log, team assignments +- **[v4-docs-reference-plan.md](./v4-docs-reference-plan.md)** — target structure philosophy, version annotation strategy, the full reference outline (directory tree), redirect philosophy +- **[v4-docs-migration-map.md](./v4-docs-migration-map.md)** — file-by-file mapping from old paths to new paths, with primary sources, additional sources, and merge requirements for each page +- **[v4-docs-implementation-plan.md](./v4-docs-implementation-plan.md)** — agent instructions, PR template, link placeholder format, section ordering +- **[v4-docs-research.md](./v4-docs-research.md)** — manual research notes (pre-existing, also committed here) + +Key architectural decisions made during planning: + +1. **Feature-first organization**: Stop grouping by "Developers" / "Administration". Make every Harper capability (CLI, MQTT, Static Files, Components, etc.) a top-level sidebar section. This mirrors how Stripe, Node.js, and other API docs are organized, and more accurately maps to how Harper is actually built — around plugins and features. + +2. **`overview.md` instead of `index.md`**: Following the Learn section pattern, reference sections use non-collapsible sidebar headers with an explicit `overview.md` at the top. No hidden index pages. + +3. **Primary vs. secondary reference pattern**: For features that span multiple sections (like Operations APIs), there's one exhaustive primary reference that other sections link to with only quick-reference summaries. Prevents duplication while maintaining discoverability. + +4. **Inline version annotations**: Node.js-style annotations (`Added in: v4.3.0`) placed inline in the content, not in YAML frontmatter. Confidence levels were required — agents had to distinguish `(confirmed via release notes)` from `(inferred from version comparison, needs verification)`. + +5. **`TODO:path` link placeholders**: Since 20 sections were being written in parallel across PRs, cross-section links couldn't be real until after all sections existed. The format `[Text](TODO:reference_versioned_docs/version-v4/section/page.md 'description')` was chosen for easy grep/replace in a later cleanup pass. + +6. **AI-first, human-review workflow**: AI agents (Claude Code in VSCode) do initial content generation from the source files; humans review, edit, and merge. Not fully automated — visibility and quality control were prioritized over speed. + +7. **Target directory**: Content goes to `reference_versioned_docs/version-v4/` first (not `reference/`), with a later copy step to `reference/` to kickstart v5. This kept v5 concerns out of scope. + +On **2026-02-19**, commit [`241f8cbe`](https://github.com/HarperFast/documentation/commit/241f8cbeab330140999a045c5db6e3b4eadf08d8) configured the build system for the migration branch: + +- Temporarily disabled the local search plugin +- Set `onBrokenLinks: 'warn'` (would throw in production; needed to allow incremental builds during migration) +- Added redirect page infrastructure +- The site now built successfully, ready for migration PRs + +Also in this commit: `scripts/harper-docs-analytics.csv` — 1,635 rows of Google Analytics pageview data (Oct 2025 – Feb 2026) that would later drive the redirect priority decisions. + +A `scripts/analyze-pageview-data.mjs` script was also created to process the CSV and surface the top-trafficked paths. + +--- + +## Content Migration Phase (2026-02-23 → 2026-03-27) + +The migration was structured into five phases based on complexity. Each section was a separate PR merged into `major-version-reorg`, with Claude Code generating initial content from the source versioned files. + +### Phase 1A — Simple, Stable Sections + +| Section | PR Merged | Commit | +| ---------------- | ---------- | --------------------------------------------------------------------------------------------------------- | +| CLI | 2026-02-23 | [`021d8000`](https://github.com/HarperFast/documentation/commit/021d80004f8a3b8be9d2be9faecbc33ca583e30d) | +| GraphQL Querying | 2026-02-24 | [`af96a726`](https://github.com/HarperFast/documentation/commit/af96a726203b35952583bd3eba6e226c419cb7a5) | +| Studio | 2026-02-24 | [`2c599700`](https://github.com/HarperFast/documentation/commit/2c599700eb40ab9ea9c91587e270026018515fc2) | +| Fastify Routes | 2026-02-24 | [`c6c99e5f`](https://github.com/HarperFast/documentation/commit/c6c99e5f6a94901bae80bdc98524bce7fd82dbce) | + +### Phase 1B — Medium Complexity + +| Section | PR Merged | Commit | +| --------------------- | ---------- | --------------------------------------------------------------------------------------------------------- | +| Environment Variables | 2026-02-25 | [`cd47bee3`](https://github.com/HarperFast/documentation/commit/cd47bee3d2bc5e48c4fe88d5b7f56bb9a5b1c20f) | +| HTTP | 2026-02-26 | [`fa4d2f38`](https://github.com/HarperFast/documentation/commit/fa4d2f38db2c6668dc336700375f2528ee36477b) | +| Static Files | 2026-03-02 | [`2d5d2939`](https://github.com/HarperFast/documentation/commit/2d5d2939003f612ff3f773c14b68fd0a5b217fc6) | +| Logging | 2026-03-04 | [`5271417c`](https://github.com/HarperFast/documentation/commit/5271417cb87021a21f078584b95d729e2d37aad9) | +| Analytics | 2026-03-10 | [`5fa17671`](https://github.com/HarperFast/documentation/commit/5fa176712840179889e16adb64e2cfe2c4deade7) | +| MQTT | 2026-03-11 | [`e46a359f`](https://github.com/HarperFast/documentation/commit/e46a359f2b0b6d9d9e08a80bfe84dadf19e80d95) | + +### Phase 1C — Complex Sections + +| Section | PR Merged | Commit | +| ------------------------ | ---------- | --------------------------------------------------------------------------------------------------------- | +| Security + Users & Roles | 2026-03-17 | [`37580219`](https://github.com/HarperFast/documentation/commit/3758021962bc06ccd8e4ebaef5aea4cd4e7173a2) | +| REST | 2026-03-18 | [`ac8b9c90`](https://github.com/HarperFast/documentation/commit/ac8b9c90fb32e48a2e3eec05e86831d9cb3e0ebe) | +| Database | 2026-03-26 | [`3508aabc`](https://github.com/HarperFast/documentation/commit/3508aabcf6da255b696100710d2f1e68ccea02c0) | +| Resources | 2026-03-26 | [`625fa2b6`](https://github.com/HarperFast/documentation/commit/625fa2b615e6079bf4b082100c10b2bdedd67174) | +| Components | 2026-03-27 | [`7359fcbb`](https://github.com/HarperFast/documentation/commit/7359fcbb9c1b1d5d24ef0b65f0f1b1be8d7e1963) | +| Replication | 2026-03-27 | [`ef09307e`](https://github.com/HarperFast/documentation/commit/ef09307e382a49b743aefee3a4ec0caa23665033) | + +### Phase 1D — Cross-Cutting Sections + +| Section | PR Merged | Commit | +| -------------- | ---------- | --------------------------------------------------------------------------------------------------------- | +| Operations API | 2026-03-27 | [`4f7fc1e0`](https://github.com/HarperFast/documentation/commit/4f7fc1e03eb6dd99cff69c28fc4f8117afac67c4) | +| Configuration | 2026-03-27 | [`ffc57e0d`](https://github.com/HarperFast/documentation/commit/ffc57e0d2bdf4b811d951d1a6015486433727549) | + +### Phase 1E — Legacy Content + +Added during migration in commit [`92ef6d5b`](https://github.com/HarperFast/documentation/commit/92ef6d5bc29c4b387261f6ab1fc6f6152d2dacb8): + +- `legacy/cloud.md` — Harper Cloud landing page directing to Fabric +- `legacy/custom-functions.md` — what Custom Functions were; points to Components +- `database/sql.md` — SQL is documented content, not just a deprecation notice, so it got a real page rather than a legacy stub + +### Adaptations from the Original Plan + +Several sections evolved during migration: + +- **Security split**: RBAC content was broken out from `security/` into its own top-level `users-and-roles/` section. The breadth of content (operations API, config file roles, permission structure) warranted its own section. +- **HTTP TLS page**: `http/tls.md` was added during migration — TLS config warranted its own page beyond what the plan specified. +- **Components JS environment**: `components/javascript-environment.md` was added to capture JS globals (server, logger, etc.) that didn't fit cleanly elsewhere. +- **Environment Variables no config page**: `environment-variables/configuration.md` was not created — the content was ported directly into `configuration/overview.md` instead. +- **Database API page**: `database/api.md` was added for JS globals (`tables`, `databases`, `transaction()`, `createBlob()`) that didn't have a clear home in the original plan. +- **Resources global APIs not created**: `resources/global-apis.md` was skipped because that content was covered by `components/javascript-environment.md`. + +--- + +## Link Resolution Phase (2026-03-30) + +Once all 20 sections were merged, all `TODO:path` placeholders were resolved in a single PR: + +- **Link Resolution PR #467** — commit [`dd8fc4fe`](https://github.com/HarperFast/documentation/commit/dd8fc4feddf047dcaceadacc0a8043c54cca62ae) + +This was done section-by-section, resolving placeholders by scanning the actual files that now existed and replacing `TODO:path` strings with real relative paths. The per-section tracker files in `migration-context/link-placeholders/` were deleted after the PR merged. + +**Cross-reference updates PR #468** — commit [`13e1f53b`](https://github.com/HarperFast/documentation/commit/13e1f53bb59bc49553bbf42ad7b8e7bd4f50cb36) updated old `/docs/` links in both release notes and learn guides to point to the new `/reference/v4/` paths. + +--- + +## Redirect Strategy (2026-03-30) + +### The Input Data + +The redirect work was driven by two inputs: + +1. **Google Analytics CSV** (`scripts/harper-docs-analytics.csv`, committed in [`fb672f4b`](https://github.com/HarperFast/documentation/commit/fb672f4bec334e05e64b1da87a1f466b8d8aff27)) — 1,635 rows of pageview data from October 2025 through February 2026. This gave traffic volumes for every old path. + +2. **`scripts/analyze-pageview-data.mjs`** — a script to process the CSV and rank paths by visit count, committed alongside the analytics data. + +The planning for redirects was documented in [memory/part5-redirects.md](./memory/part5-redirects.md), which contains: + +- The full new URL structure (`/reference/v4/[section]/[page]`) +- Annotated list of every old path with visit counts (50+ views) and their mapped targets +- Paths explicitly identified as needing no new redirect (install guides, `/learn/`, `/fabric/`) +- Notes on the old `redirects.ts` issues (stale `withBase()` abstraction, very old HarperDB-era rules) + +### The Tier System + +Redirects were prioritized by traffic volume: + +- **High priority (>200 views)**: 17 paths — explicit per-path redirects with comments +- **Medium priority (50–200 views)**: ~40 paths — explicit redirects +- **Low traffic (<50 views)**: Catch-all patterns rather than individual rules +- **Versioned paths (`/docs/4.X/...`)**: Low traffic across the board — catch-all redirect to `/reference/v4/` + +Notable redirect decisions: + +- `/docs/` root (2,854 views) → `/` (site root) +- `/docs/developers/applications/caching` (410 views) → `/reference/v4/resources/overview` (with a comment noting this should eventually point to a dedicated Learn guide) +- `/docs/reference/globals` (277 views) → `/reference/v4/components/javascript-environment` (the globals page became the JS environment page) + +### The Output + +Commit [`5e84ecf0`](https://github.com/HarperFast/documentation/commit/5e84ecf03a583129c5b752e79369b94d5c4d4691) (2026-03-30) — "finish redirects": + +- **`redirects.ts`** — rewritten (469 lines, net +153): non-versioned `/docs/*` paths → new `/reference/v4/` paths +- **`historic-redirects.ts`** — new file (1,811 lines): versioned `/docs/4.X/*` paths → new paths +- **`scripts/pageview-data-test.js`** — 215-line test script to validate redirect coverage against the analytics data +- **`CONTRIBUTING.md`** — added notes on the `docusaurus serve` bug with `4.X` paths (the `serve-handler` bug that treats `4.6` as a file extension) and the patch procedure + +--- + +## Old Content Deletion and Final Wiring (2026-03-30) + +Three commits on the same day completed the transition: + +1. **[`99bf4d81`](https://github.com/HarperFast/documentation/commit/99bf4d819d64604c8ebbda49153ca147f29ac96c)** — "checkpoint before deleting old content files" — final snapshot before deletion + +2. **[`48764459`](https://github.com/HarperFast/documentation/commit/487644598ddf55344c3c1c0e908ebabeeb4c84b4)** — "delete old docs content" — removed the entire `docs/` tree: `docs/administration/`, `docs/deployments/`, `docs/developers/`, `docs/reference/`, etc. (~34 files, thousands of lines) + +3. **[`0ebea43a`](https://github.com/HarperFast/documentation/commit/0ebea43acfc82215ff5d44d14ee8d40922bf4f63)** — "copy new content to reference/" — copied the finalized content from `reference_versioned_docs/version-v4/` into `reference/` to serve as the v5 starting point (as planned from the beginning) + +Additional cleanup on the same day: + +- [`7c62241a`](https://github.com/HarperFast/documentation/commit/7c62241afc25a184a1a0c7a82adc0c7acec12272) — removed paginator from reference section +- [`9aee72d7`](https://github.com/HarperFast/documentation/commit/9aee72d714cb458c7622f4b9f8bfa9f5cf67251a) — format pass +- [`256de664`](https://github.com/HarperFast/documentation/commit/256de664cf4526bfc78c7a82d259db929f84845f) — re-enabled `onBrokenLinks: 'throw'` (the temporary `'warn'` setting from the planning phase was finally reverted) + +--- + +## What the Final Structure Looks Like + +The new reference lives at `/reference/v4/` with 20 top-level sections, each with an `overview.md` and additional pages: + +``` +reference/v4/ +├── analytics/ (overview, operations) +├── cli/ (overview, commands, authentication, operations-api-commands) +├── components/ (overview, applications, extension-api, javascript-environment, plugin-api) +├── configuration/ (overview, options, operations) +├── database/ (overview, schema, api, data-loader, storage-algorithm, jobs, system-tables, compaction, transaction, sql) +├── environment-variables/ (overview) +├── fastify-routes/ (overview) +├── graphql-querying/ (overview) +├── http/ (overview, configuration, api, tls) +├── legacy/ (cloud, custom-functions) +├── logging/ (overview, configuration, api, operations) +├── mqtt/ (overview, configuration) +├── operations-api/ (overview, operations) +├── replication/ (overview, clustering, sharding) +├── resources/ (overview, resource-api, query-optimization) +├── rest/ (overview, querying, headers, content-types, websockets, server-sent-events) +├── security/ (overview, basic-authentication, jwt-authentication, mtls-authentication, certificate-management, certificate-verification, configuration, api) +├── static-files/ (overview) +├── studio/ (overview) +└── users-and-roles/ (overview, configuration, operations) +``` + +The `versioned_docs/version-4.X/` folders were removed. The seven-version structure is gone. Version history for any feature is now expressed inline within the single v4 reference using version annotations. + +--- + +## Notable Technical Decisions and Tradeoffs + +### Why AI Agents, Not Pure Automation + +The original plan considered building an Agent SDK pipeline to fully automate migrations. The decision was made to use Claude Code in VSCode instead — providing visibility at each step and allowing human intervention on each PR. The project brief ([v4-docs-project-brief.md](./v4-docs-project-brief.md#key-decisions-log)) explicitly called out: "Provides visibility and control; can pivot to automation if needed." + +In practice, this meant each section still had a "manual review" commit before merging. Examples: [`253d3aae`](https://github.com/HarperFast/documentation/commit/253a3eae), [`55432eaa`](https://github.com/HarperFast/documentation/commit/55432eaa), [`c7286b58`](https://github.com/HarperFast/documentation/commit/c7286b58). + +### The `--fixup` Commit Strategy + +Migration branches used `git commit --fixup ` for corrections to keep the development history clean while allowing easy squashing. This is described in the implementation plan and visible in the commit history of individual migration branches before squash-merge. + +### Preview Deployments for the Migration Branch + +Commit [`296064fd`](https://github.com/HarperFast/documentation/commit/296064fd05761486bc2861ccc6cae12a68ca6190) updated GitHub Actions workflows to generate preview deployments for PRs against `major-version-reorg` (not just PRs against `main`). This allowed reviewing each section in the rendered site before merging. + +### The `docusaurus serve` / `4.X` Path Bug + +While testing the final historic redirects, a bug was discovered in `serve-handler` (a Vercel library used by `docusaurus serve`) where directory paths ending in a number like `4.6` are treated as files rather than directories (because `4.6` looks like a file extension). This caused 404s on all `/docs/4.X/` paths locally. The fix required patching `node_modules/serve-handler/src/index.js` — the patch instructions were added to `CONTRIBUTING.md` in commit [`5e84ecf0`](https://github.com/HarperFast/documentation/commit/5e84ecf03a583129c5b752e79369b94d5c4d4691). An upstream PR was filed at https://github.com/vercel/serve-handler/pull/230. + +### Redirect Testing with Real Analytics Data + +Rather than manually checking redirects, a `scripts/pageview-data-test.js` script (215 lines, added in the "finish redirects" commit) validates redirect coverage against the actual analytics CSV. This makes the redirect file auditable — you can run the test and see which high-traffic paths have explicit redirects vs. fallthrough. + +--- + +## By the Numbers + +- **Duration**: ~6 weeks (Feb 18 – Mar 31, 2026) +- **Sections migrated**: 20 +- **PRs merged (migration)**: ~20 section PRs + link resolution + cross-references +- **Source version folders eliminated**: 7 (`version-4.1` through `version-4.7`) +- **Old `docs/` files deleted**: ~100+ files across administration, deployments, developers, and reference subdirectories +- **New reference pages created**: ~60+ files +- **Redirects configured**: ~150+ explicit rules in `redirects.ts` + 1,811 lines of versioned path rules in `historic-redirects.ts` +- **Analytics paths analyzed**: 1,635 rows of pageview data used to prioritize redirect targets +- **Planning documents written**: 5 documents (~4,500 lines) before a single migration PR was opened From 918f82c4727916b960767b99352ca50e156b916a Mon Sep 17 00:00:00 2001 From: Ethan Arrowood Date: Tue, 31 Mar 2026 11:28:08 -0600 Subject: [PATCH 51/51] remove planning documents --- memory/MEMORY.md | 77 -- memory/part5-redirects.md | 221 ------ migration-context/link-placeholders/todo | 0 v4-docs-implementation-plan.md | 634 --------------- v4-docs-migration-map.md | 961 ----------------------- v4-docs-project-brief.md | 419 ---------- v4-docs-project-retrospective.md | 280 ------- v4-docs-reference-plan.md | 379 --------- v4-docs-research.md | 328 -------- 9 files changed, 3299 deletions(-) delete mode 100644 memory/MEMORY.md delete mode 100644 memory/part5-redirects.md delete mode 100644 migration-context/link-placeholders/todo delete mode 100644 v4-docs-implementation-plan.md delete mode 100644 v4-docs-migration-map.md delete mode 100644 v4-docs-project-brief.md delete mode 100644 v4-docs-project-retrospective.md delete mode 100644 v4-docs-reference-plan.md delete mode 100644 v4-docs-research.md diff --git a/memory/MEMORY.md b/memory/MEMORY.md deleted file mode 100644 index 76043bf2..00000000 --- a/memory/MEMORY.md +++ /dev/null @@ -1,77 +0,0 @@ -# Documentation Migration Memory - -## Project Overview - -Harper v4 docs migration: consolidating `versioned_docs/version-4.X/` → `reference_versioned_docs/version-v4/` with feature-based reorganization. - -- **Working branch**: `major-version-reorg` (all migration PRs target this branch) -- **Target dir**: `reference_versioned_docs/version-v4/` -- **Do NOT touch**: `versioned_docs/` or `reference/` - -## Key Files - -- `v4-docs-implementation-plan.md` — Agent instructions (follow Part 1 closely) -- `v4-docs-migration-map.md` — Authoritative source-to-target mapping per section -- `v4-docs-reference-plan.md` — Structure philosophy and outline -- `reference_versioned_sidebars/version-v4-sidebars.json` — Sidebar to update for each section -- ~~`migration-context/link-placeholders/`~~ — **Deleted** (Part 3 link resolution complete) - -## Release Notes Location - -`release-notes/v4-tucker/4.X.0.md` (NOT `release_notes/`) - -## Completed Sections - -All Phase 1A–1D sections are complete and merged: - -- CLI, GraphQL Querying, Studio, Fastify Routes (Phase 1A) -- Environment Variables, Static Files, HTTP, MQTT, Logging, Analytics (Phase 1B) -- Security, Users & Roles, REST (PR #457), Database (PR #458), Resources (PR #459), Components (PR #460), Replication (PR #461) (Phase 1C) -- Operations API (PR #462), Configuration (PR #463) (Phase 1D) - -## Key Decisions / Learnings - -- Each section gets its own branch `migration/[section-name]` off `major-version-reorg` (for phase 1 content generation) -- PRs are draft by default, opened against `major-version-reorg` -- `@relationship` in v4.7 source (not `@relation` from 4.3 release notes) — needs human verification -- Audit log required for real-time messaging (MQTT/WebSocket) — verify still true -- `schema.md` kept unified (overview + blobs + vectors); consider splitting if too long -- System tables include: `hdb_raw_analytics`, `hdb_analytics`, `hdb_dataloader_hash`, `hdb_nodes`, `hdb_certificate` -- Analytics detail lives in `analytics/overview.md`, not `database/system-tables.md` -- Components section added `javascript-environment.md` (not in original plan) - -## Next Steps - -**Part 3 (Link Resolution) — Complete** on `link-resolution` branch. Merged to `major-version-reorg`. - -**Part 4 (Cross-Reference Updates) — Complete** on `cross-reference-updates` branch (2 commits). - -- URL prefix confirmed: `/reference/v4/` (not `/docs/v4/`) -- Updated 8 release note files (4.1.0–4.6.0 + index.mdx) and 3 learn guides -- Left `/docs/administration/cloning` as-is in 4.2.0.md (no learn guide exists yet) - -**Part 5 (Redirects)** — Complete. `redirects.ts` rewritten from analytics data, plugin uncommented in `docusaurus.config.ts`. See `memory/part5-redirects.md` for details. Needs build verification + review of LOW TRAFFIC (<10 view) paths. - -### Part 3 Key Decisions - -- Operations table category links (e.g. `../operations-api/database.md`) → `../operations-api/operations.md` with section anchors (no sub-pages exist) -- `resources/global-apis.md` never created → links redirected to `../components/javascript-environment.md` -- SQL operations link → `../database/sql.md` (SQL moved from legacy per migration map) -- `[Applications](TODO:applications/overview.md)` → `../components/overview.md` -- Malformed `[TODO:path](TODO:path)` links in `components/overview.md` fixed with proper text - -Legacy section: single files only (no subfolders): `cloud.md`, `custom-functions.md`. SQL moved to `database/sql.md`. - -## Sidebar Pattern - -```json -{ - "type": "category", - "label": "Section Name", - "collapsible": false, - "className": "learn-category-header", - "items": [{ "type": "doc", "id": "section/page", "label": "Label" }] -} -``` - -Insert new sections before the Legacy category at the bottom of the sidebar. diff --git a/memory/part5-redirects.md b/memory/part5-redirects.md deleted file mode 100644 index 70f63a01..00000000 --- a/memory/part5-redirects.md +++ /dev/null @@ -1,221 +0,0 @@ -# Part 5: Redirects Work - -## Status: Implementation complete — needs build verification + human review of LOW TRAFFIC items - -## Overview - -Rewriting `redirects.ts` to handle migrations from old `/docs/` paths to new `/reference/v4/` paths. -The redirect plugin is currently commented out in `docusaurus.config.ts` (lines 218-225). - -**Key constraint:** No redirects needed for the new `/reference/` section itself. The `/learn/`, `/release-notes/`, and `/fabric/` sections need very few redirects (flag exceptions). - -## The New URL Structure - -New reference paths live at `/reference/v4/[section]/[page]`: - -| Section | Key Pages | -| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | -| analytics | overview, operations | -| cli | overview, commands, authentication, operations-api-commands | -| components | overview, applications, extension-api, javascript-environment, plugin-api | -| configuration | overview, options, operations | -| database | overview, schema, api, data-loader, storage-algorithm, jobs, system-tables, compaction, transaction, sql | -| environment-variables | overview | -| fastify-routes | overview | -| graphql-querying | overview | -| http | overview, configuration, api, tls | -| legacy | cloud, custom-functions | -| logging | overview, configuration, api, operations | -| mqtt | overview, configuration | -| operations-api | overview, operations | -| replication | overview, clustering, sharding | -| resources | overview, resource-api, query-optimization | -| rest | overview, querying, headers, content-types, websockets, server-sent-events | -| security | overview, basic-authentication, jwt-authentication, mtls-authentication, certificate-management, certificate-verification, configuration, api | -| static-files | overview | -| studio | overview | -| users-and-roles | overview, configuration, operations | - -## Old Path Structure (v4.7) - -The old docs were at `/docs/` serving the latest (4.7) content: - -- `/docs/developers/applications/*` → Components (new path) -- `/docs/developers/operations-api/*` → Operations API + various sections -- `/docs/developers/security/*` → Security -- `/docs/developers/replication/*` → Replication -- `/docs/developers/real-time` → REST (websockets/SSE) -- `/docs/developers/rest` → REST -- `/docs/developers/clustering/*` → Replication/clustering -- `/docs/developers/components/*` → (old reference/components - different from apps) -- `/docs/deployments/configuration` → Configuration -- `/docs/deployments/harper-cli` → CLI -- `/docs/deployments/install-harper/*` → (install - no new reference page) -- `/docs/deployments/harper-cloud/*` → Legacy/cloud -- `/docs/deployments/upgrade-hdb-instance` → (no direct equivalent in new ref) -- `/docs/administration/harper-studio/*` → Studio -- `/docs/administration/logging/*` → Logging -- `/docs/administration/cloning` → Replication -- `/docs/administration/compact` → Database/compaction -- `/docs/administration/jobs` → Database/jobs -- `/docs/reference/*` → Old reference section (reference/analytics, reference/resources/\*, etc.) -- `/docs/foundations/*` → learn/ (already handled) -- `/docs/getting-started/*` → learn/ (already handled) - -## Analytics: Top Paths Requiring New Redirects (views > 50) - -Paths from GA data (Oct 2025 – Feb 2026) that need redirects to `/reference/v4/`: - -### High Priority (>200 views) - -- `/docs/developers/operations-api` (1028) → `/reference/v4/operations-api/overview` -- `/docs/developers/applications` (727) → `/reference/v4/components/overview` -- `/docs/reference/resources` (667) → `/reference/v4/resources/overview` -- `/docs/deployments/configuration` (608) → `/reference/v4/configuration/overview` -- `/docs/developers/rest` (547) → `/reference/v4/rest/overview` -- `/docs/deployments/harper-cli` (467) → `/reference/v4/cli/overview` -- `/docs/reference` (459) → `/reference/v4` (index) -- `/docs/developers/applications/defining-schemas` (455) → `/reference/v4/database/schema` -- `/docs/developers/operations-api/nosql-operations` (435) → `/reference/v4/operations-api/operations` -- `/docs/developers/applications/caching` (410) → `/reference/v4/resources/overview` (or resource-api) -- `/docs/developers/real-time` (407) → `/reference/v4/rest/websockets` (or rest/overview) -- `/docs/developers/operations-api/databases-and-tables` (385) → `/reference/v4/database/overview` -- `/docs/developers/operations-api/components` (356) → `/reference/v4/operations-api/operations` -- `/docs/deployments/install-harper` (343) → keep as-is (deploy content, not in new ref) -- `/docs/developers/replication` (328) → `/reference/v4/replication/overview` -- `/docs/developers/operations-api/advanced-json-sql-examples` (158) → `/reference/v4/operations-api/operations` -- `/docs/developers/operations-api/bulk-operations` (158) → `/reference/v4/operations-api/operations` - -### Medium Priority (50–200 views) - -- `/docs/developers/applications/data-loader` (218) → `/reference/v4/database/data-loader` -- `/docs/developers/operations-api/system-operations` (213) → `/reference/v4/operations-api/operations` -- `/docs/reference/components/built-in-extensions` (204) → `/reference/v4/components/extension-api` -- `/docs/developers/operations-api/configuration` (203) → `/reference/v4/configuration/operations` -- `/docs/developers/applications/web-applications` (199) → `/reference/v4/components/applications` -- `/docs/developers/operations-api/users-and-roles` (195) → `/reference/v4/users-and-roles/operations` -- `/docs/developers/security` (183) → `/reference/v4/security/overview` -- `/docs/reference/resources/instance-binding` (181) → `/reference/v4/resources/resource-api` -- `/docs/developers/applications/debugging` (150) → `/reference/v4/components/overview` -- `/docs/reference/components/plugins` (150) → `/reference/v4/components/plugin-api` -- `/docs/developers/applications/define-routes` (144) → `/reference/v4/fastify-routes/overview` -- `/docs/reference/analytics` (135) → `/reference/v4/analytics/overview` -- `/docs/developers/replication/sharding` (133) → `/reference/v4/replication/sharding` -- `/docs/developers/operations-api/logs` (132) → `/reference/v4/logging/operations` -- `/docs/reference/dynamic-schema` (132) → `/reference/v4/database/schema` -- `/docs/administration/harper-studio` (130) → `/reference/v4/studio/overview` -- `/docs/reference/graphql` (109) → `/reference/v4/graphql-querying/overview` -- `/docs/reference/resources/migration` (109) → `/reference/v4/database/data-loader` -- `/docs/reference/data-types` (107) → `/reference/v4/database/schema` -- `/docs/reference/architecture` (105) → `/reference/v4` (no direct equiv - use index) -- `/docs/developers/operations-api/clustering-nats` (80) → `/reference/v4/replication/clustering` -- `/docs/developers/operations-api/token-authentication` (79) → `/reference/v4/security/jwt-authentication` -- `/docs/reference/transactions` (79) → `/reference/v4/database/transaction` -- `/docs/reference/limits` (78) → `/reference/v4/database/schema` (or overview) -- `/docs/developers/security/jwt-auth` (77) → `/reference/v4/security/jwt-authentication` -- `/docs/developers/security/certificate-management` (76) → `/reference/v4/security/certificate-management` -- `/docs/reference/blob` (76) → `/reference/v4/database/schema` -- `/docs/reference/components/configuration` (74) → `/reference/v4/components/overview` -- `/docs/developers/security/configuration` (98) → `/reference/v4/security/configuration` -- `/docs/developers/security/users-and-roles` (93) → `/reference/v4/users-and-roles/overview` -- `/docs/administration/cloning` (87) → `/reference/v4/replication/overview` -- `/docs/developers/operations-api/certificate-management` (114) → `/reference/v4/security/certificate-management` -- `/docs/developers/operations-api/custom-functions` (113) → `/reference/v4/legacy/custom-functions` -- `/docs/developers/operations-api/jobs` (113) → `/reference/v4/database/jobs` -- `/docs/developers/security/basic-auth` (83) → `/reference/v4/security/basic-authentication` -- `/docs/reference/globals` (277) → `/reference/v4/components/javascript-environment` -- `/docs/reference/components` (159) → `/reference/v4/components/overview` -- `/docs/reference/components/extensions` (102) → `/reference/v4/components/extension-api` -- `/docs/reference/components/applications` (121) → `/reference/v4/components/applications` -- `/docs/developers/applications/defining-roles` (119) → `/reference/v4/users-and-roles/overview` -- `/docs/developers/operations-api/sql-operations` (96) → `/reference/v4/database/sql` -- `/docs/administration/logging/standard-logging` (91) → `/reference/v4/logging/overview` -- `/docs/administration/logging` (68) → `/reference/v4/logging/overview` -- `/docs/reference/roles` (62) → `/reference/v4/users-and-roles/overview` -- `/docs/reference/storage-algorithm` (61) → `/reference/v4/database/storage-algorithm` -- `/docs/developers/sql-guide` (53) → `/reference/v4/database/sql` -- `/docs/developers/operations-api/registration` (59) → `/reference/v4/operations-api/operations` -- `/docs/administration/compact` (56) → `/reference/v4/database/compaction` -- `/docs/reference/resources/query-optimization` (55) → `/reference/v4/resources/query-optimization` -- `/docs/administration/jobs` (54) → `/reference/v4/database/jobs` -- `/docs/developers/operations-api/analytics` (145) → `/reference/v4/analytics/operations` -- `/docs/developers/operations-api/quickstart-examples` (145) → `/reference/v4/operations-api/operations` -- `/docs/reference/content-types` (70) → `/reference/v4/rest/content-types` -- `/docs/reference/headers` (46) → `/reference/v4/rest/headers` -- `/docs/developers/security/certificate-verification` (46) → `/reference/v4/security/certificate-verification` -- `/docs/administration/logging/audit-logging` (72) → `/reference/v4/logging/overview` -- `/docs/developers/clustering` (72) → `/reference/v4/replication/clustering` -- `/docs/administration/logging/transaction-logging` (45) → `/reference/v4/logging/overview` -- `/docs/reference/clustering` (31) → `/reference/v4/replication/clustering` -- `/docs/reference/clustering/enabling-clustering` (25) → `/reference/v4/replication/clustering` -- `/docs/reference/clustering/establishing-routes` (20) → `/reference/v4/replication/clustering` -- `/docs/reference/clustering/subscription-overview` (19) → `/reference/v4/replication/clustering` -- `/docs/reference/sql-guide` (26) → `/reference/v4/database/sql` -- `/docs/reference/sql-guide/json-search` (23) → `/reference/v4/database/sql` -- `/docs/developers/security/mtls-auth` (32) → `/reference/v4/security/mtls-authentication` -- `/docs/developers/components/built-in` (26) → `/reference/v4/components/extension-api` -- `/docs/developers/components/reference` (25) → `/reference/v4/components/extension-api` -- `/docs/developers/components` (33) → `/reference/v4/components/overview` -- `/docs/administration/harper-studio/create-account` (45) → `/reference/v4/studio/overview` - -## Paths That DON'T Need Redirects to /reference/v4/ - -- `/docs/deployments/install-harper/*` — installation content, no equivalent in new ref -- `/docs/deployments/harper-cloud/*` — redirect to `/reference/v4/legacy/cloud` (or keep existing) -- `/docs/deployments/upgrade-hdb-instance` — keep existing redirect or drop -- `/docs/administration/harper-studio/*` (most subpages) — redirect to `/reference/v4/studio/overview` -- `/docs/getting-started/*` — already redirects to `/learn/` -- `/docs/foundations/*` — already redirects to `/learn/` - -## Versioned Doc Paths (/docs/4.X/) in Analytics - -Low traffic but some exist. Recommend a general catch-all pattern: - -- `/docs/4.X/developers/...` → strip version prefix, apply same rules as `/docs/developers/...` -- `/docs/4.X/reference/...` → strip version prefix, apply same rules as `/docs/reference/...` -- Alternative: redirect `/docs/4.X/...` → `/docs/...` (simpler, single hop) - -## Special Notes for Non-Reference Sections - -### /learn/ — needs few/no new redirects - -- Already has redirects for `/getting-started/*` and `/foundations/*` -- `/learn/developers/coming-soon` and `/learn/administration/coming-soon` are real pages, no redirects needed - -### /release-notes/ — existing redirects are fine - -- The existing `createRedirects` logic for release-notes path variants (old naming) is worth keeping -- No new redirects needed unless we change the release-notes structure - -### /fabric/ — no redirects needed - -- Brand new section with no old paths to redirect from - -## Old redirects.ts Issues - -The existing file has: - -1. Many rules dragged from very old docs (HarperDB Studio → Harper Studio, HarperDB Cloud, custom-functions etc.) that are still valid but very old -2. `withBase()` abstraction that adds complexity — the basePath was used when docs were at `/docs/` but now everything is at root -3. Separate `generateRedirects()` and `createRedirects()` (wildcard) functions — the split is conceptually fine -4. Some rules still point to old paths like `/administration/harper-studio/`, `/deployments/install-harper/` etc. which still exist in the current site - -## Approach for New redirects.ts - -1. **Keep** existing rules that redirect very-old paths (pre-Harper) → current paths — these are still valid -2. **Add** new rules for old `/docs/developers/`, `/docs/reference/`, `/docs/administration/`, `/docs/deployments/` → `/reference/v4/` -3. **Use patterns** for versioned paths `/docs/4.X/...` — either: - - Pattern: catch-all redirect `/docs/4.X/` → drop version and apply same rules (cleaner) - - Or just let them 404 — traffic is low (<30 views per page) -4. **Remove** now-redundant `basePath` abstraction since redirect targets are absolute paths -5. **Simplify** `createRedirects` wildcard function to focus on the actual patterns needed - -## Decisions (Confirmed) - -- `/docs/` root (2854 views) → redirect to `/` (site root) -- `/docs/developers/applications/caching` → `/reference/v4/resources/overview` (add comment: eventually redirect to a dedicated learn page for database caching) -- `/docs/reference/globals` → `/reference/v4/components/javascript-environment` ✓ -- Versioned `/docs/4.X/*` paths → **catch-all to `/reference/v4/`** (not per-path mappings; traffic is low) -- No `basePath`/`withBase()` abstraction — all redirect targets are absolute paths, site is served at `/` -- **Clean break**: only keep rules for paths that appear in pageview data. Paths with <10 views are marked for review — we may 404 those. -- The redirect plugin is commented out in `docusaurus.config.ts` — uncomment it as part of this work. diff --git a/migration-context/link-placeholders/todo b/migration-context/link-placeholders/todo deleted file mode 100644 index e69de29b..00000000 diff --git a/v4-docs-implementation-plan.md b/v4-docs-implementation-plan.md deleted file mode 100644 index b52e038f..00000000 --- a/v4-docs-implementation-plan.md +++ /dev/null @@ -1,634 +0,0 @@ -# Harper v4 Documentation Migration Implementation Plan - -This document outlines the concrete steps for migrating Harper v4 documentation from `versioned_docs/version-4.X/` into a consolidated `reference_versioned_docs/version-v4/` structure as defined in [v4-docs-reference-plan.md](./v4-docs-reference-plan.md) and mapped in [v4-docs-migration-map.md](./v4-docs-migration-map.md). - -## Overview - -**Branch Strategy**: All work happens on `major-version-reorg` branch. Once complete, merge to `main` in one go. - -**Target Directory**: All new content goes into `reference_versioned_docs/version-v4/` (NOT `reference/`). After v4 migration is complete, we'll copy to `reference/` to kickstart v5 (out of scope for this plan). - -**Approach**: AI agents do initial content generation → humans review and edit → merge → repeat until complete → cleanup passes. - ---- - -## Part 1: Initial Content Generation (AI-Driven) - -### Overview - -AI agents work through the migration map, creating PRs for each top-level section. Each PR adds new files without removing anything from `versioned_docs/`. - -### Agent Instructions - -For each section in the migration map, the agent should: - -1. **Read the migration map entry** for the section -2. **Read all source files** listed in "Primary Source" and "Additional Sources" -3. **Read relevant release notes** from `release_notes/` for version annotations -4. **Generate new reference files** following the structure in the reference plan -5. **Add inline source comments** documenting what was used: - - ```markdown - - - - ``` - -6. **Use link placeholders** for cross-references that don't exist yet: - - ```markdown - [JWT Authentication](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md 'Will be created in security section') - ``` - - **IMPORTANT**: After generating all files in the section, replace TODO placeholders with relative paths for internal section links: - - For links within the same section: Use relative paths like `./filename.md` - - For links to other sections not yet migrated: Keep TODO placeholders - - Example: `[CLI Commands](./commands.md)` NOT `[CLI Commands](TODO:reference_versioned_docs/version-v4/cli/commands.md)` - -7. **Create section-specific link placeholder tracker**: - - Store in `migration-context/link-placeholders/` - - Named by section: `cli-link-placeholders.md`, `security-link-placeholders.md`, etc. - - Format: - - ```markdown - # Link Placeholders for [Section Name] - - ## reference_versioned_docs/version-v4/[section]/[file].md - - - Line 45: `[JWT Auth](TODO:reference_versioned_docs/version-v4/security/jwt-authentication.md)` - - Context: Discussing authentication methods - - Target should be: Main JWT authentication reference page - - - Line 123: `[Operations API](TODO:reference_versioned_docs/version-v4/operations-api/operations.md)` - - Context: Listing all available operations - - Target should be: Complete operations list - ``` - -8. **Add version annotations** using the strategy defined in reference plan: - - ```markdown - ## Relationships - - Added in: v4.3.0 - - The `@relation` directive... - ``` - - **Include confidence levels**: - - "Added in: v4.3.0 (confirmed via release notes)" - - "Added in: v4.3.0 (inferred from version comparison, needs verification)" - - "Changed in: v4.4.0 (likely, needs human verification)" - -9. **Note conflicts and uncertainties** in PR description - -10. **Handle images/assets** with placeholders: - - ```markdown - - - - ![Architecture Diagram](TODO:IMAGE) - ``` - -11. **Update the versioned sidebar** at `reference_versioned_sidebars/version-v4-sidebars.json`: - - Add a non-collapsible category for the section - - List all pages in the appropriate order - - Match the pattern from `sidebarsLearn.ts` (non-collapsible with `className: "learn-category-header"`) - - Example: - - ```json - { - "type": "category", - "label": "CLI", - "collapsible": false, - "className": "learn-category-header", - "items": [ - { - "type": "doc", - "id": "cli/overview", - "label": "Overview" - } - // ... - ] - } - ``` - -12. **Update migration-map.md** status to "In Progress" for that section - -13. **Git workflow with fixup commits**: - - Create feature branch: `git checkout -b migration/[section-name]` - - Make initial commit with all content files - - Use `git commit --fixup ` for subsequent changes - - This allows easy squashing later while keeping development history clear - - Example: - - ```bash - # Initial commit - git add reference_versioned_docs/version-v4/cli/*.md - git commit -m "docs: migrate CLI section to v4 consolidated reference" - - # Subsequent fixes use --fixup - git add reference_versioned_sidebars/version-v4-sidebars.json - git commit --fixup HEAD - ``` - - - PRs will be squash-merged to maintain clean history on main branch - -14. **Create PR** with comprehensive description (template below) - -### PR Description Template - -```markdown -# [Section Name] Migration - -## Summary - -Migration of [section name] documentation from versioned_docs into new reference structure. - -## Files Created - -- reference_versioned_docs/version-v4/[section]/overview.md -- reference_versioned_docs/version-v4/[section]/page1.md -- reference_versioned_docs/version-v4/[section]/page2.md - -## Source Files Used - -### reference_versioned_docs/version-v4/[section]/overview.md - -- `versioned_docs/version-4.7/path/to/file.md` (primary source) -- `versioned_docs/version-4.2/path/to/file.md` (for baseline features) -- `release_notes/4.3.0.md` (feature introduction dates) - -### reference_versioned_docs/version-v4/[section]/page1.md - -- `versioned_docs/version-4.7/path/to/another.md` (primary) -- ... - -## Version Annotations Added - -### High Confidence (Confirmed via release notes) - -- Feature X: Added in v4.3.0 -- Feature Y: Changed in v4.4.0 - -### Needs Verification - -- Feature Z: Likely added in v4.3.0 (inferred from version comparison) -- Config option ABC: Possibly changed in v4.5.0 (mentioned in docs but not in release notes) - -## Link Placeholders Created - -See `migration-context/link-placeholders/[section]-link-placeholders.md` for complete list. - -Summary: - -- 12 placeholders to operations-api section -- 5 placeholders to security section -- 3 placeholders to configuration section - -## Images/Assets Noted - -- Line 45 of overview.md: TODO-IMAGE for architecture diagram -- Line 123 of page1.md: TODO-IMAGE for flow chart - -## Conflicts & Questions for Human Review - -### Content Conflicts - -None (reference/ directory was reset) - -### Uncertainties - -- Unclear if Feature Z was introduced in v4.3.0 or v4.4.0 - marked for verification -- Configuration option `foo.bar` mentioned in v4.5 docs but not in earlier versions or release notes - -## Migration Map Status - -Updated status for this section to "In Progress" - -## Checklist for Human Reviewer - -- [ ] Verify version annotations marked as "needs verification" -- [ ] Review content accuracy and completeness -- [ ] Check inline source comments are accurate -- [ ] Decide on image/asset handling -- [ ] Ensure link placeholders make sense -- [ ] Update migration-map.md status to "Complete" after merge -``` - -### Sections to Migrate (In Order of Priority) - -Based on migration map and reference plan, recommend this order. Each section is generated as a complete unit with all its pages at once: - -**Phase 1A - Simple, Stable Sections** — **Complete** - -1. **CLI** (`reference_versioned_docs/version-v4/cli/`) — **Complete** - - `overview.md` - - `commands.md` - - `operations-api-commands.md` - - `authentication.md` - -2. **GraphQL Querying** (`reference_versioned_docs/version-v4/graphql-querying/`) — **Complete** - - `overview.md` - -3. **Studio** (`reference_versioned_docs/version-v4/studio/`) — **Complete** - - `overview.md` - Simple page covering local Studio UI configuration and access - -4. **Fastify Routes** (`reference_versioned_docs/version-v4/fastify-routes/`) — **Complete** - - `overview.md` - -**Phase 1B - Medium Complexity** — **Complete** - -1. **Environment Variables** (`reference_versioned_docs/version-v4/environment-variables/`) — **Complete** - - `overview.md` - - ~~`configuration.md`~~ _(not created — content to be ported into `configuration/overview.md`)_ - -2. **Static Files** (`reference_versioned_docs/version-v4/static-files/`) — **Complete** - - `overview.md` - - ~~`configuration.md`~~ _(not needed — all options documented inline in overview)_ - -3. **HTTP** (`reference_versioned_docs/version-v4/http/`) — **Complete** - - `overview.md` - - `configuration.md` - - `api.md` - - `tls.md` _(added during migration — TLS config warranted its own page)_ - -4. **MQTT** (`reference_versioned_docs/version-v4/mqtt/`) — **Complete** - - `overview.md` - - `configuration.md` - -5. **Logging** (`reference_versioned_docs/version-v4/logging/`) — **Complete** - - `overview.md` - - `configuration.md` - - `api.md` - - `operations.md` - -6. **Analytics** (`reference_versioned_docs/version-v4/analytics/`) — **Complete** - - `overview.md` - - `operations.md` - -**Phase 1C - Complex Sections** - -1. **Security** (`reference_versioned_docs/version-v4/security/`) — **Complete** - - `overview.md` - - `basic-authentication.md` - - `jwt-authentication.md` - - `mtls-authentication.md` - - `certificate-management.md` - - `certificate-verification.md` - - `configuration.md` _(consolidated from planned `cors.md` + `ssl.md`)_ - - `api.md` _(added during migration — not in original plan)_ - -2. **Users and Roles** (`reference_versioned_docs/version-v4/users-and-roles/`) — **Complete** - - `overview.md` - - `configuration.md` - - `operations.md` - - _Note: Broken out from Security section during migration; RBAC content warranted its own top-level section._ - -3. **REST** (`reference_versioned_docs/version-v4/rest/`) — **Complete** - - `overview.md` - - `querying.md` - - `headers.md` - - `content-types.md` - - `websockets.md` - - `server-sent-events.md` - -4. **Database** (`reference_versioned_docs/version-v4/database/`) — **Complete** - - `overview.md` - - `schema.md` - - `api.md` _(JS globals: `tables`, `databases`, `transaction()`, `createBlob()`)_ - - `data-loader.md` - - `storage-algorithm.md` - - `jobs.md` - - `system-tables.md` - - `compaction.md` - - `transaction.md` - -5. **Resources** (`reference_versioned_docs/version-v4/resources/`) — **Complete** - - `overview.md` - - `resource-api.md` - - ~~`global-apis.md`~~ _(not created — content covered by `components/javascript-environment.md`)_ - - `query-optimization.md` - -6. **Components** (`reference_versioned_docs/version-v4/components/`) — **Complete** - - `overview.md` - - `applications.md` - - `extension-api.md` - - `plugin-api.md` - - `javascript-environment.md` _(added during migration — JS environment details warranted its own page)_ - -7. **Replication** (`reference_versioned_docs/version-v4/replication/`) — **Complete** - - `overview.md` - - `clustering.md` - - `sharding.md` - -**Phase 1D - Cross-Cutting Sections** — **Complete** - -1. **Operations API** (`reference_versioned_docs/version-v4/operations-api/`) — **Complete** - - `overview.md` - - `operations.md` - -2. **Configuration** (`reference_versioned_docs/version-v4/configuration/`) — **Complete** - - `overview.md` - - `options.md` - - `operations.md` - -**Phase 1E - Legacy Content** — **Complete** - -1. **Legacy** (`reference_versioned_docs/version-v4/legacy/`) — **Complete** - - `cloud.md` - Harper Cloud landing page directing users to Fabric - - `custom-functions.md` - What Custom Functions were; directs to Components - - ~~`sql.md`~~ - Moved to `database/sql.md` (SQL is documented content, not just a deprecation notice) - ---- - -## Part 2: Human Review & Merge - -### For Each PR - -1. **Human reviews PR** using checklist in PR description -2. **Human edits content** as needed: - - Verify version annotations - - Improve writing/clarity - - Resolve uncertainties - - Handle image decisions -3. **Human approves and merges PR** -4. **Human updates migration-map.md** status to "Complete" - ---- - -## Part 3: Link Resolution (AI-Driven) - -Once all Part 1 PRs are merged, resolve link placeholders. - -### Agent Instructions - -1. **Read all `migration-context/link-placeholders/*.md` files** -2. **Scan all `reference_versioned_docs/version-v4/` files** to build index of what exists -3. **For each placeholder**: - - Determine if target file exists - - If exists: replace `TODO:path` with actual relative path - - If doesn't exist: flag for human review (might be typo in original plan) -4. **Create PR(s)** for link resolution: - - Option A: One PR per section - - Option B: One large PR for all links - - Recommend: One PR per section for easier review -5. **PR description** should list: - - How many links resolved - - How many links couldn't be resolved (and why) - -### Link Resolution PR Template - -```markdown -# Link Resolution: [Section Name] - -## Summary - -Resolved link placeholders in [section name] now that target pages exist. - -## Links Resolved - -- `reference_versioned_docs/version-v4/[section]/file1.md` line 45: JWT Auth → `../security/jwt-authentication.md` -- `reference_versioned_docs/version-v4/[section]/file1.md` line 67: Operations → `../operations-api/operations.md` -- ... (X total links resolved) - -## Links Unable to Resolve - -- `reference_versioned_docs/version-v4/[section]/file2.md` line 123: Target `TODO:reference_versioned_docs/version-v4/foo/bar.md` doesn't exist - - Recommendation: This might be a typo, should probably link to `../foo/baz.md` instead - -## Checklist - -- [ ] Human verify resolved links are correct -- [ ] Human resolve any unresolvable links -- [ ] Delete corresponding `migration-context/link-placeholders/[section]-link-placeholders.md` after merge -``` - ---- - -## Part 4: Cross-Reference Updates (AI-Assisted) - -Update other parts of documentation that reference the old structure. - -### 4.1: Release Notes - -**Task**: Update internal links in release notes to point to new structure. - -**Agent Instructions**: - -1. Scan all files in `release_notes/` -2. Find links to old paths (e.g., `/docs/4.7/...`, `/docs/developers/...`) -3. Map to new paths based on migration map -4. Create PR with updates - -### 4.2: Learn Guides - -**Task**: Update links in learn guides to point to new reference structure. - -**Agent Instructions**: - -1. Scan all files in `learn/` -2. Find links to old reference paths -3. Map to new paths -4. Create PR with updates - -### 4.3: Other Documentation - -**Task**: Find and update any other references to old paths. - -**Agent Instructions**: - -1. Search entire repo for common old path patterns -2. Update as appropriate -3. Create PR with updates - ---- - -## Part 5: Redirects Configuration (AI-Assisted) - -Configure redirects from old paths to new paths. - -### Agent Instructions - -1. **Analyze existing `redirects.ts`** (or wherever redirects are configured) -2. **Read sitemap** (if available) for list of old paths -3. **Use migration map** to determine new paths for old URLs -4. **Generate redirect rules**: - - Perfect redirects for mapped pages - - Catch-all redirects for unmapped pages (to appropriate section overview) -5. **Create PR** with redirect configuration - -### Redirect Priority - -Focus on: - -1. Most visited pages (if analytics data available) -2. All `/docs/4.7/` paths (current latest) -3. Common paths across v4.2-v4.6 (many are duplicates) -4. Catch-all for everything else - ---- - -## Part 7: Cleanup & Finalization - -### 7.1: Orphaned Content Review - -**Human Task**: - -1. Review "Files Being Removed" section in migration map -2. Confirm these files are intentionally not migrated -3. Document decision (move to legacy, move to learn, delete entirely) - -### 7.2: Remove Old Content - -**After all above steps complete**: - -1. Create PR that removes old `versioned_docs/version-4.X/` folders -2. Only do this after confirming: - - All content is migrated or intentionally deprecated - - All orphaned content is accounted for - - Redirects are working - - Sidebars are updated - -### 7.3: Final Validation - -**Human Task**: - -1. Build documentation locally -2. Spot check various pages -3. Test redirects -4. Verify no broken links -5. Check version annotations make sense - -### 7.4: Merge to Main - -Once everything on `major-version-reorg` branch is complete: - -1. Final review of entire branch -2. Squash/organize commits if needed -3. Format -4. Merge to `main` -5. Deploy - ---- - -## Agent Configuration Summary - -### Files Agents Should Reference - -**Primary**: - -- `v4-docs-migration-map.md` - The authoritative source for what goes where -- `v4-docs-reference-plan.md` - Understanding structure and philosophy -- `versioned_docs/version-4.X/**/*.md` - Source content -- `release_notes/*.md` - Version annotation validation -- `v4-docs-research.md` - Manual research notes - -### Agent Constraints - -**DO**: - -- Add new files to `reference_versioned_docs/version-v4/` -- Include inline source comments -- Use link placeholders with TODO: prefix -- Create section-specific link placeholder trackers -- Add version annotations with confidence levels -- Flag uncertainties for human review -- Update migration-map.md status - -**DO NOT**: - -- Remove anything from `versioned_docs/` (wait until Part 7) -- Add files to `reference/` (that's for v5 later) -- Guess at version annotations without noting confidence -- Skip inline source documentation -- Make assumptions about image handling without flagging - -### Link Placeholder Format - -**Standard format**: - -```markdown -[Link Text](TODO:reference_versioned_docs/version-v4/section/page.md 'Optional description of expected target') -``` - -**For images**: - -```markdown - - - -![Alt text](TODO:IMAGE) -``` - -### Version Annotation Format - -**High confidence**: - -```markdown -Added in: v4.3.0 -``` - -**Needs verification**: - -```markdown -Added in: v4.3.0 (inferred from version comparison, needs verification) -``` - -**Changed features**: - -```markdown -Changed in: v4.4.0 - -[Describe the change] -In previous versions: [Describe old behavior] -``` - -**Deprecated features**: - -```markdown -Deprecated in: v4.X.0 (moved to legacy in v4.7+) - -[Feature] is still supported but discouraged. See [alternative] for modern approach. -``` - ---- - -## Success Criteria - -- [ ] All sections from migration map have PRs created -- [ ] All PRs reviewed and merged by humans -- [ ] All link placeholders resolved -- [ ] Cross-references in release_notes and learn updated -- [ ] Sidebars configured -- [ ] Redirects configured -- [ ] Old versioned_docs removed -- [ ] Documentation builds without errors -- [ ] Spot checks confirm accuracy -- [ ] Branch merged to main - ---- - -## Estimated Timeline - -- **Part 1** (AI generation): Agents can work in parallel, ~1-2 days for all PRs -- **Part 2** (Human review): Depends on reviewer availability, estimate 1-2 weeks -- **Part 3** (Link resolution): ~1 day for agent work + ~2-3 days human review -- **Part 4** (Cross-references): ~1 day for agent work + ~1 day human review -- **Part 5** (Sidebars): ~1 day total -- **Part 6** (Redirects): ~1-2 days total -- **Part 7** (Cleanup): ~2-3 days total - -**Total estimated**: 3-4 weeks (heavily dependent on human review throughput) - ---- - -## Notes - -- Keep `versioned_docs/` intact throughout process as source of truth -- All work on `major-version-reorg` branch -- Human review is critical - AI does heavy lifting, humans ensure quality -- Link placeholders prevent getting blocked on interdependencies -- Section-specific placeholder files prevent merge conflicts -- Version annotations preserve historical context -- Inline source comments maintain traceability diff --git a/v4-docs-migration-map.md b/v4-docs-migration-map.md deleted file mode 100644 index 3f89e44d..00000000 --- a/v4-docs-migration-map.md +++ /dev/null @@ -1,961 +0,0 @@ -# Harper v4 Documentation Migration Map - -This document maps existing documentation paths from `versioned_docs/version-4.X/` and `reference/` to the new consolidated reference structure defined in [reference-plan-v4.md](./reference-plan-v4.md). - -## Legend - -- **Primary Source**: The version folder that should be used as the primary content source (usually v4.7) -- **Merge Required**: Content needs to be merged from multiple versions -- **Version Annotations**: Requires version history annotations based on earlier versions -- **Status**: Current migration status - - `Not Started` - No work done yet - - `In Progress` - Currently being migrated - - `Complete` - Migration finished - - `N/A` - Not applicable (content being removed/deprecated) - ---- - -## CLI Section - -### `reference/cli/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/deployments/harper-cli.md` -- **Additional Sources**: - - `versioned_docs/version-4.1/cli.md` (for baseline features) - - Current `reference/harper-cli.md` (if exists) -- **Merge Required**: Yes - CLI commands added across versions -- **Version Annotations**: Track command additions from v4.1 → v4.7 -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API commands - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Dev mode (`harperdb dev`, `harperdb run`) - -### `reference/cli/commands.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` -- **Additional Sources**: Compare all versions for command evolution -- **Version Annotations**: Each command should note its introduction version -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion - - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Foreground mode changes - -### `reference/cli/operations-api-commands.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/deployments/harper-cli.md` -- **Additional Sources**: `versioned_docs/version-4.3+` (CLI ops api support added in v4.3) -- **Version Annotations**: Note v4.3.0 introduction -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CLI expansion with operations API - -### `reference/cli/authentication.md` - -- **Primary Source**: New content or extract from CLI docs -- **Status**: Complete - ---- - -## Configuration Section - -### `reference/configuration/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` -- **Additional Sources**: - - Current `reference/configuration.md` - - `versioned_docs/version-4.1/configuration.md` (baseline) -- **Status**: Complete -- **Notes**: Must include a dedicated section on environment variable configuration. Content researched and ready from the environment-variables migration: - - **Naming convention**: YAML keys map to `SCREAMING_SNAKE_CASE` env vars (e.g. `http.port` → `HTTP_PORT`, `operationsApi.network.port` → `OPERATIONSAPI_NETWORK_PORT`). Case-insensitive. Component configuration cannot be set this way. - - **`HDB_CONFIG`**: CLI/ENV variable to specify a custom config file path at install time. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 51-55. - - **`HARPER_DEFAULT_CONFIG`**: Added in v4.7.2. Sets default config values as JSON, respects user edits, restores original on key removal. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 80-116 + `release_notes/4.7.2.md`. - - **`HARPER_SET_CONFIG`**: Added in v4.7.2. Forces config values that always win, even over user edits. Deleted (not restored) on key removal. Source: `versioned_docs/version-4.7/deployments/configuration.md` lines 118-145 + `release_notes/4.7.2.md`. - - **Configuration precedence**: `HARPER_SET_CONFIG` > user manual edits > `HARPER_DEFAULT_CONFIG` > file defaults. - - **State tracking**: Harper maintains `{rootPath}/backup/.harper-config-state.json` for drift detection and restoration. - - Full content is in `reference_versioned_docs/version-v4/environment-variables/configuration.md` — this file should be deleted after porting its content here. -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Major config changes (http section, componentRoot) - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Configuration improvements - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Developer/production mode - - [4.7.2](release-notes/v4-tucker/4.7.2.md) - HARPER_SET_CONFIG and HARPER_DEFAULT_CONFIG added - -### `reference/configuration/options.md` - -- **Primary Source**: Current `reference/configuration.md` (very comprehensive) -- **Additional Sources**: Compare all version-X/deployments/configuration.md files -- **Merge Required**: Yes - configuration options added across versions -- **Version Annotations**: Each config option needs version introduced -- **Status**: Complete -- **Notes**: This will be a large migration task - the current configuration.md is 59KB -- **Release Notes**: Major config changes across many versions - see all major releases - -### `reference/configuration/operations.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/configuration.md` -- **Additional Sources**: Earlier versions for feature evolution -- **Version Annotations**: Track when ops were added -- **Status**: Complete - ---- - -## Operations API Section - -### `reference/operations-api/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/index.md` -- **Additional Sources**: - - `versioned_docs/version-4.2/developers/operations-api/index.md` (first structured ops api section) -- **Status**: Complete - -### `reference/operations-api/operations.md` - -- **Primary Source**: Synthesize from all `versioned_docs/version-4.7/developers/operations-api/*.md` files -- **Merge Required**: Yes - comprehensive list linking to primary references -- **Version Annotations**: Each operation needs version introduced -- **Status**: Complete -- **Notes**: This should be a simplified reference table/list with links to detailed docs in feature sections - ---- - -## Security Section - -### `reference/security/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/index.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: Complete - -### `reference/security/basic-authentication.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/basic-auth.md` -- **Additional Sources**: `versioned_docs/version-4.1/security/basic-authentication.md` -- **Version Annotations**: Available since v4.1.0 -- **Status**: Complete - -### `reference/security/jwt-authentication.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/jwt-auth.md` -- **Additional Sources**: `versioned_docs/version-4.1/security/jwt.md` -- **Version Annotations**: Available since v4.1.0 -- **Status**: Complete - -### `reference/security/mtls-authentication.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/mtls-auth.md` -- **Additional Sources**: `versioned_docs/version-4.3/developers/security/mtls-auth.md` -- **Version Annotations**: Added in v4.3.0 -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS support added - -### `reference/security/certificate-management.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-management.md` -- **Additional Sources**: - - `versioned_docs/version-4.1/security/certificate-management.md` - - `versioned_docs/version-4.4+` (dynamic cert management added) -- **Merge Required**: Yes - dynamic certificate management added in v4.4 -- **Version Annotations**: Dynamic certs added v4.4.0 -- **Status**: Complete -- **Release Notes**: - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Dynamic certificate management - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Certificate revocation - -### `reference/security/certificate-verification.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/certificate-verification.md` -- **Version Annotations**: Added in v4.7.0 (OCSP support) -- **Status**: Complete -- **Release Notes**: - - [4.7.0](release-notes/v4-tucker/4.7.0.md) - OCSP support - -### `reference/security/configuration.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: Complete -- **Notes**: Covers authentication configuration (authorizeLocal, cacheTTL, enableSessions, token timeouts, hashFunction), CORS, and SSL/TLS settings. Originally planned as separate `cors.md` and `ssl.md` pages; consolidated into a single `configuration.md` during migration. - -### `reference/security/api.md` - -- **Status**: Complete -- **Notes**: Added during migration — not in the original plan. Security-related API reference. - ---- - -## Users and Roles Section - -Broken out from the security section during migration — RBAC warrants its own top-level section given the breadth of content (operations API, config file roles, permission structure). - -### `reference/users-and-roles/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/security/users-and-roles.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/reference/roles.md` - - Current `reference/defining-roles.md` -- **Merge Required**: Yes - content spread across multiple files -- **Status**: Complete -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Password hashing upgrade (sha256, argon2id) - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Cookie-based sessions - -### `reference/users-and-roles/configuration.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/roles.md` -- **Additional Sources**: `versioned_docs/version-4.7/developers/security/configuration.md` -- **Status**: Complete -- **Notes**: Config file roles (roles.yaml), password hashing - -### `reference/users-and-roles/operations.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/users-and-roles.md` -- **Status**: Complete -- **Notes**: Operations API — all role and user operations -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Password hashing upgrade (sha256, argon2id) - ---- - -## Components Section - -### `reference/components/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/components/index.md` -- **Additional Sources**: - - `versioned_docs/version-4.1/custom-functions/*` (for evolution context) - - `versioned_docs/version-4.2/developers/applications/index.md` - - Current `reference/components/index.md` -- **Merge Required**: Yes - tell the evolution story (custom functions → components → applications/extensions → plugins) -- **Version Annotations**: - - Custom Functions: v4.1.0 - - Components concept: v4.2.0 - - Applications/Extensions: v4.3.0+ - - Plugin API: v4.6.0 -- **Status**: Complete -- **Notes**: This is a critical page that explains the evolution -- **Release Notes**: - - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Custom functions with worker threads - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture introduced - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API with dynamic reloading - -### `reference/components/applications.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/components/applications.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/developers/applications/*.md` - - Current `reference/components/applications.md` -- **Merge Required**: Yes - application developer docs scattered across multiple files -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Component architecture, NPM/GitHub deployment - -### `reference/components/extension-api.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/components/extensions.md` -- **Additional Sources**: Current `reference/components/extensions.md` -- **Version Annotations**: Extension API formalized around v4.4-4.5 -- **Status**: Complete -- **Release Notes**: - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - New extension API - -### `reference/components/plugin-api.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/components/plugins.md` -- **Additional Sources**: Current `reference/components/plugins.md` -- **Version Annotations**: Added in v4.6.0 -- **Status**: Complete -- **Release Notes**: - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Plugin API introduced - - [4.7.0](release-notes/v4-tucker/4.7.0.md) - Further plugin API improvements - -### `reference/components/javascript-environment.md` - -- **Status**: Complete -- **Notes**: Added during migration — JavaScript environment details for component development warranted its own page. Not in the original plan. - ---- - -## Database Section - -### `reference/database/overview.md` - -- **Primary Source**: New content synthesizing how database system works -- **Additional Sources**: - - `versioned_docs/version-4.7/reference/architecture.md` - - Current `reference/architecture.md` -- **Status**: Complete -- **Notes**: Should explain Resources + Schema + Auto-REST relationship -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Database structure changes (single file per database) - -### `reference/database/schema.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/applications/defining-schemas.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/reference/data-types.md` - - `versioned_docs/version-4.7/reference/dynamic-schema.md` - - Current `reference/defining-schemas.md` - - Current `reference/data-types.md` - - Current `reference/dynamic-schema.md` - - `versioned_docs/version-4.7/reference/blob.md` - - Current `reference/blob.md` - - Vector docs (if exists) -- **Merge Required**: Yes - comprehensive schema system documentation -- **Version Annotations**: - - Basic schemas: v4.2.0 - - Relations (@relation): v4.3.0 - - Computed properties: v4.4.0 - - Blob storage: v4.5.0 - - Vector indexing: v4.6.0 -- **Status**: Complete -- **Notes**: Large consolidation - may want to keep blobs/vectors separate -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Configurable schemas with GraphQL syntax - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Relationships and joins, indexing nulls, BigInt support - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Computed properties, custom indexing, auto-incrementing primary keys - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Blob storage - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Vector indexing (HNSW) - -### `reference/database/data-loader.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/applications/data-loader.md` -- **Additional Sources**: Current `reference/data-loader.md` -- **Version Annotations**: Added in v4.5.0 -- **Status**: Complete -- **Release Notes**: - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Data loader introduced - -### `reference/database/storage-algorithm.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/storage-algorithm.md` -- **Additional Sources**: Current `reference/storage-algorithm.md` -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Storage performance improvements, compression by default - -### `reference/database/jobs.md` - -- **Primary Source**: `versioned_docs/version-4.7/administration/jobs.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/developers/operations-api/jobs.md` - - `versioned_docs/version-4.7/developers/operations-api/bulk-operations.md` -- **Merge Required**: Yes - jobs/bulk operations content scattered -- **Status**: Complete - -### `reference/database/system-tables.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` -- **Additional Sources**: Current `reference/analytics.md` -- **Status**: Complete -- **Notes**: System tables for analytics and other features - -### `reference/database/compaction.md` - -- **Primary Source**: `versioned_docs/version-4.7/administration/compact.md` -- **Additional Sources**: Current `reference/compact.md` -- **Version Annotations**: Added in v4.3.0 -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Compact database functionality - -### `reference/database/api.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/globals.md` (tables, databases globals) -- **Additional Sources**: - - `versioned_docs/version-4.7/reference/transactions.md` (transaction() function) - - `versioned_docs/version-4.7/reference/blob.md` (createBlob() function) -- **Merge Required**: Yes — combines tables/databases globals, transaction(), and createBlob() into one page -- **Version Annotations**: Blob type added in v4.5.0 -- **Status**: Complete -- **Notes**: Covers the JS globals most relevant to database interaction. Server globals (server.http, server.ws, etc.) belong in the HTTP/Resources sections. -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Blob storage and createBlob() added - -### `reference/database/transaction.md` - -- **Primary Source**: `versioned_docs/version-4.7/administration/logging/transaction-logging.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/administration/logging/audit-logging.md` - - `versioned_docs/version-4.1/transaction-logging.md` - - `versioned_docs/version-4.1/audit-logging.md` -- **Merge Required**: Yes - combines audit and transaction logging -- **Version Annotations**: Transaction logging available since v4.1.0, audit logging since v4.1.0 -- **Status**: Complete -- **Notes**: Consolidated from separate audit and transaction logging pages -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Balanced audit log cleanup - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Transaction reuse, storage reclamation (audit log eviction) - -### `reference/database/sql.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/sql-guide/` (all files) -- **Additional Sources**: - - `versioned_docs/version-4.7/developers/operations-api/sql-operations.md` -- **Merge Required**: Yes — consolidates all sql-guide sub-pages into one -- **Status**: Complete -- **Notes**: Moved here from Legacy section. Includes a prominent warning that SQL is not recommended for production use or large tables. Covers DML syntax, features matrix, all function categories (aggregate, string, math, logical, date/time), SEARCH_JSON, geospatial functions, and reserved words. - ---- - -## Resources Section - -### `reference/resources/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` -- **Additional Sources**: Current `reference/resources/` folder -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced - -### `reference/resources/resource-api.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/resources/index.md` -- **Additional Sources**: - - `versioned_docs/version-4.7/reference/resources/instance-binding.md` - - `versioned_docs/version-4.7/reference/resources/migration.md` - - Current `reference/resources/index.md` - - Current `reference/resources/instance-binding.md` -- **Merge Required**: Yes - Resource API has two forms (with/without loadAsInstance) -- **Version Annotations**: - - Basic Resource API: v4.2.0 - - loadAsInstance changes: v4.4.0+ - - Response objects: v4.4.0 -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Resource API introduced - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - CRDT support - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Response object support - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Property forwarding - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Resource API upgrades - -### ~~`reference/resources/global-apis.md`~~ - -- **Status**: N/A -- **Notes**: Content superseded by `reference/components/javascript-environment.md`, which covers all global APIs (`tables`, `databases`, `transaction`, `createBlob`, `Resource`, `server`, `contentTypes`, `logger`) and references out to the appropriate sections for full detail. The two broken links in `resources/resource-api.md:547` and `rest/content-types.md:64` need to be updated to point to `../components/javascript-environment.md`. - -### `reference/resources/query-optimization.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/resources/query-optimization.md` -- **Additional Sources**: Current `reference/resources/query-optimization.md` -- **Status**: Complete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Query optimizations - ---- - -## Environment Variables Section - -### `reference/environment-variables/overview.md` - -- **Primary Source**: New content about `loadEnv` plugin -- **Additional Sources**: Built-in extensions docs, configuration docs -- **Version Annotations**: loadEnv added in v4.5.0 -- **Status**: Complete -- **Notes**: Covers `loadEnv` extension only. Harper-level environment variable configuration (naming conventions, `HDB_CONFIG`, `HARPER_DEFAULT_CONFIG`, `HARPER_SET_CONFIG`) belongs in the Configuration section — see notes there. The originally planned `configuration.md` sub-page was not created; that content is to be ported into `configuration/overview.md` (see Configuration section notes). -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Built-in loadEnv component - ---- - -## Static Files Section - -### `reference/static-files/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/components/built-in-extensions.md` (static section) -- **Additional Sources**: - - `versioned_docs/version-4.6/reference/components/built-in-extensions.md` (pre-v4.7 behavior) - - `versioned_docs/version-4.5/developers/components/built-in.md` (early v4 behavior) -- **Status**: Complete -- **Notes**: No separate `configuration.md` needed — all static plugin options are documented inline in the overview. The `configuration.md` entry has been removed; static file serving has no Harper-level configuration. The v4.7 Plugin API (`extensions`, `fallthrough`, `index`, `notFound` options and auto-update behavior) is annotated as added in v4.7.0 (inferred from version comparison; not present in v4.6 docs). The `static` plugin itself predates v4.7 (present in v4.4 and earlier). -- **Release Notes**: - - [4.7.2](release-notes/v4-tucker/4.7.2.md) - `static` handler defaults to `index.html` - - [4.7.3](release-notes/v4-tucker/4.7.3.md) - Fix trailing slash issue with static component - ---- - -## HTTP Section - -### `reference/http/overview.md` - -- **Primary Source**: New content about HTTP server -- **Additional Sources**: Configuration docs, architecture docs -- **Status**: Complete -- **Release Notes**: - - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Worker threads for HTTP requests - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Socket management (SO_REUSEPORT), flexible port configs - -### `reference/http/configuration.md` - -- **Primary Source**: Extract from `reference/configuration.md` (http section) -- **Version Annotations**: - - HTTP/2 support: v4.5.0 -- **Status**: Complete -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - HTTP/2 support - -### `reference/http/api.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (server global) -- **Additional Sources**: Current `reference/globals.md` -- **Version Annotations**: - - server.authenticateUser: v4.5.0 -- **Status**: Complete -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - server.authenticateUser API - -### `reference/http/tls.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/security/` (TLS/certificate configuration) -- **Additional Sources**: `versioned_docs/version-4.7/deployments/configuration.md` (tls config section) -- **Status**: Complete -- **Notes**: Created during migration as a dedicated TLS configuration reference for the HTTP server. Originally not in the plan (TLS was expected to be in security section); added as a separate HTTP sub-page given the close relationship to HTTP configuration. - ---- - -## REST Section - -### `reference/rest/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/rest.md` -- **Additional Sources**: Current `reference/rest.md` -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - REST interface introduced - -### `reference/rest/querying.md` - -- **Primary Source**: Extract from REST docs and NoSQL operations -- **Additional Sources**: - - `versioned_docs/version-4.7/developers/operations-api/nosql-operations.md` -- **Version Annotations**: - - Null indexing/querying: v4.3.0 - - URL path improvements: v4.5.0 -- **Status**: Complete -- **Release Notes**: - - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Iterator-based queries - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Relationships/joins, sorting, nested select, null indexing - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Improved URL path parsing, directURLMapping - -### `reference/rest/headers.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/headers.md` -- **Additional Sources**: Current `reference/headers.md` -- **Status**: Complete - -### `reference/rest/content-types.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/content-types.md` -- **Additional Sources**: Current `reference/content-types.md` -- **Status**: Complete - -### `reference/rest/websockets.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` -- **Additional Sources**: Current `reference/real-time.md` -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - WebSocket support - -### `reference/rest/server-sent-events.md` - -- **Primary Source**: Extract from real-time or REST docs -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Server-Sent Events support - ---- - -## MQTT Section - -### `reference/mqtt/overview.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/developers/real-time.md` -- **Additional Sources**: Built-in plugins/extensions docs -- **Version Annotations**: - - MQTT features: v4.2.0+ - - mTLS support: v4.3.0 - - Single-level wildcards: v4.3.0 - - CRDT: v4.3.0 -- **Status**: Complete -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - MQTT support introduced (QoS 0 and 1, durable sessions) - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - mTLS, single-level wildcards, retain handling, CRDT - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Improved message delivery, blob support for MQTT - -### `reference/mqtt/configuration.md` - -- **Primary Source**: Extract from configuration docs and real-time docs -- **Version Annotations**: Port change v4.5.0 (9925 → 9933) -- **Status**: Complete -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Default replication port change - ---- - -## Logging Section - -### `reference/logging/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/administration/logging/index.md` -- **Additional Sources**: Current `reference/logging.md` (if exists) -- **Status**: Complete -- **Release Notes**: - - [4.1.0](release-notes/v4-tucker/4.1.0.md) - Logging revamped, consolidated into hdb.log - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Major logging improvements - -### `reference/logging/configuration.md` - -- **Primary Source**: Extract from configuration docs -- **Version Annotations**: - - Per-component logging: v4.6.0 - - Granular configuration: v4.6.0 -- **Status**: Complete -- **Release Notes**: - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Per-component logging, dynamic reloading, HTTP logging - -### `reference/logging/api.md` - -- **Primary Source**: Extract from `versioned_docs/version-4.7/reference/globals.md` (logger global) -- **Status**: Complete -- **Release Notes**: - - [4.6.0](release-notes/v4-tucker/4.6.0.md) - Logger based on Node.js Console API - -### `reference/logging/operations.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/logs.md` -- **Status**: Complete -- **Notes**: Operations for managing standard logs (not transaction/audit logs, which moved to database section) - ---- - -## Analytics Section - -### `reference/analytics/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/analytics.md` -- **Additional Sources**: Current `reference/analytics.md` -- **Version Annotations**: - - Resource analytics: v4.5.0 - - Storage analytics: v4.5.0 -- **Status**: Complete -- **Release Notes**: - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Resource and storage analytics - - [4.7.0](release-notes/v4-tucker/4.7.0.md) - New analytics and licensing functionality - -### `reference/analytics/operations.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/operations-api/analytics.md` -- **Status**: Complete - ---- - -## Replication Section - -### `reference/replication/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/replication/index.md` -- **Additional Sources**: Current `reference/replication/` (if exists) -- **Version Annotations**: - - Native Replication (Plexus): v4.4.0 -- **Status**: Complete -- **Release Notes**: - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native replication system (Plexus), replicated operations - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information, improved replication timestamps - -### `reference/replication/clustering.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/clustering/index.md` -- **Additional Sources**: - - All `versioned_docs/version-4.7/reference/clustering/*.md` files - - `versioned_docs/version-4.7/developers/operations-api/clustering.md` - - Current `reference/clustering/` folder -- **Merge Required**: Yes - extensive clustering documentation needs consolidation -- **Status**: Complete -- **Notes**: Large section with many sub-pages -- **Release Notes**: - - [4.2.0](release-notes/v4-tucker/4.2.0.md) - Clone node functionality - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native replication with PKI/mTLS - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Cluster status information - -### `reference/replication/sharding.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/replication/sharding.md` -- **Version Annotations**: - - Sharding: v4.4.0 - - Expanded functionality: v4.5.0 -- **Status**: Complete -- **Release Notes**: - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Sharding introduced - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - Expanded sharding functionality - ---- - -## GraphQL Querying Section - -### `reference/graphql-querying/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/reference/graphql.md` -- **Additional Sources**: Current `reference/graphql.md` -- **Version Annotations**: - - Added: v4.4.0 (experimental) - - Disabled by default: v4.5.0 -- **Status**: Complete -- **Notes**: Mark as experimental/incomplete -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - OpenAPI specification endpoint - - [4.4.0](release-notes/v4-tucker/4.4.0.md) - Native GraphQL support (provisional) - - [4.5.0](release-notes/v4-tucker/4.5.0.md) - GraphQL configuration, disabled by default - ---- - -## Studio Section - -### `reference/studio/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/deployments/configuration.md` (localStudio configuration) -- **Status**: Complete -- **Notes**: Simple overview page focusing on: - - How to configure/enable local Studio (localStudio.enabled in config) - - How to access local Studio (http://localhost:9926) - - General description of Studio's purpose (UI for Harper instance) - - Link to hosted Studio at studio.harperdb.io -- **Release Notes**: - - [4.3.0](release-notes/v4-tucker/4.3.0.md) - Local studio upgraded to match online version - - [4.7.0](release-notes/v4-tucker/4.7.0.md) - Studio client updated - ---- - -## Fastify Routes Section - -### `reference/fastify-routes/overview.md` - -- **Primary Source**: `versioned_docs/version-4.7/developers/applications/define-routes.md` -- **Additional Sources**: Current `reference/define-routes.md` -- **Status**: Complete -- **Notes**: Discouraged in favor of modern routing with components, but still a supported feature. - ---- - -## Legacy Section - -### `reference/legacy/cloud/` - -- **Primary Source**: `versioned_docs/version-4.7/administration/harper-studio/*` -- **Additional Sources**: `versioned_docs/version-4.7/deployments/harper-cloud/*` -- **Status**: Complete -- **Notes**: The primary and additional sources are to be completely removed and this section is to act as a basic landing page to direct users to Fabric instead. - -### `reference/legacy/custom-functions.md` - -- **Primary Source**: `versioned_docs/version-4.1/custom-functions/index.md` -- **Additional Sources**: `versioned_docs/version-4.7/developers/operations-api/custom-functions.md` -- **Status**: Complete -- **Notes**: Single page (not a folder) — describes what Custom Functions were and directs users to the Components section as the modern alternative. - -### ~~`reference/legacy/sql.md`~~ - -- **Status**: N/A -- **Notes**: Moved to `reference/database/sql.md` — SQL is documented content (not just a deprecation notice), so it belongs in the Database section. See entry there. - ---- - -## Files Requiring Special Attention - -### High Priority Merges - -These files require careful merging from multiple sources: - -1. **Configuration Options** (`reference/configuration/options.md`) - - Current `reference/configuration.md` is comprehensive (59KB) - - Need to track every config option's version introduction - - Consider automated script to compare config files across versions - -2. **Schema System** (`reference/database/schema.md`) - - Merges: data-types, dynamic-schema, defining-schemas, blobs, vectors - - Significant evolution across v4.2 → v4.6 - - May want to split into multiple pages - -3. **Components Evolution** (`reference/components/overview.md`) - - Must tell the full story: custom functions → components → apps → plugins - - Critical for user understanding - -4. **Clustering** (`reference/replication/clustering.md`) - - 10+ files in current clustering/ folder - - Extensive operations APIs - - Significant changes between NATS and native replication - -5. **Resource API** (`reference/resources/resource-api.md`) - - Two flavors (instance-binding vs not) - - Migration path complex - - Significant API changes in v4.4 - -### Files Being Removed/Ignored - -These exist in current docs but won't exist in new structure: - -**To be moved to Learn guides:** - -- `versioned_docs/version-4.7/administration/administration.md` - Generic admin intro -- `versioned_docs/version-4.7/administration/cloning.md` - Move to Learn guide -- `versioned_docs/version-4.7/developers/applications/debugging.md` - Move to Learn guide -- `versioned_docs/version-4.7/developers/applications/caching.md` - Move to Learn guide -- `versioned_docs/version-4.7/developers/applications/web-applications.md` - Move to Learn guide -- `versioned_docs/version-4.7/developers/operations-api/quickstart-examples.md` - Move to Learn guide -- `versioned_docs/version-4.7/developers/operations-api/advanced-json-sql-examples.md` - Move to Learn guide -- `versioned_docs/version-4.7/deployments/install-harper/*` - Move to Learn guides -- `versioned_docs/version-4.7/deployments/upgrade-hdb-instance.md` - Move to Learn guide - -**To be ignored (obsolete content):** - -- `versioned_docs/version-4.7/reference/index.md` - Generic intro page -- `versioned_docs/version-4.7/reference/limits.md` - Fold into database/overview or schema -- `versioned_docs/version-4.7/administration/harper-studio/` - direct users to fabric -- `versioned_docs/version-4.7/deployments/harper-cloud/` - direct users to fabric - -### Cross-References to Update - -Files that heavily reference paths that will change: - -- All operations-api/\*.md files reference other sections -- Security files cross-reference operations and configuration -- Components files reference configuration and operations -- Clustering files extensively cross-reference - ---- - -## Version Annotation Checklist - -For each file migrated, ensure: - -- [ ] Features note their introduction version -- [ ] Changed behaviors note the version they changed -- [ ] Deprecated features note deprecation version -- [ ] Configuration options include "Added in:" notes -- [ ] Operations APIs include "Added in:" notes -- [ ] Links to related version-specific content - -## Release Notes Reference Guide - -The `release-notes/v4-tucker/` directory contains 169 release note files covering the entire v4.0 - v4.7 series. Key minor releases with significant feature additions: - -### Minor Releases - -- **[4.1.0](release-notes/v4-tucker/4.1.0.md)** (Worker threads, iterator-based queries, logging revamp) -- **[4.2.0](release-notes/v4-tucker/4.2.0.md)** (Resource API, Component Architecture, REST interface, MQTT/WebSockets/SSE, configurable schemas) -- **[4.3.0](release-notes/v4-tucker/4.3.0.md)** (Relationships/joins, null indexing, CLI expansion, mTLS, BigInt, compaction) -- **[4.4.0](release-notes/v4-tucker/4.4.0.md)** (Native replication/Plexus, sharding, computed properties, custom indexing, GraphQL, dynamic certificates) -- **[4.5.0](release-notes/v4-tucker/4.5.0.md)** (Blob storage, password hashing upgrade, HTTP/2, expanded sharding, loadEnv component) -- **[4.6.0](release-notes/v4-tucker/4.6.0.md)** (Vector indexing/HNSW, new extension API, logging improvements, data loader) -- **[4.7.0](release-notes/v4-tucker/4.7.0.md)** (Component status monitoring, OCSP, new analytics/licensing) - -### Feature-to-Release Note Mapping - -When adding version annotations, refer to these key features and their introduction versions: - -**CLI & Configuration** - -- CLI expansion with operations API: 4.3.0 -- Dev mode (`harperdb dev`): 4.2.0 -- Configuration improvements: 4.3.0, 4.4.0 - -**Security** - -- mTLS support: 4.3.0 -- Dynamic certificate management: 4.4.0 -- OCSP support: 4.7.0 -- Password hashing (sha256, argon2id): 4.5.0 -- Certificate revocation: 4.5.0 - -**Components & Extensions** - -- Component architecture: 4.2.0 -- Custom functions with worker threads: 4.1.0 -- New extension API: 4.6.0 -- Plugin API: 4.6.0, 4.7.0 -- Built-in loadEnv component: 4.5.0 - -**Database & Schema** - -- Configurable schemas (GraphQL syntax): 4.2.0 -- Relationships and joins: 4.3.0 -- Computed properties: 4.4.0 -- Custom indexing: 4.4.0 -- Blob storage: 4.5.0 -- Vector indexing (HNSW): 4.6.0 -- BigInt support: 4.3.0 -- Null indexing: 4.3.0 -- Auto-incrementing primary keys: 4.4.0 - -**Data Access** - -- Resource API: 4.2.0 -- CRDT support: 4.3.0 -- Response object support: 4.4.0 -- Property forwarding: 4.5.0 -- Data loader: 4.6.0 -- Iterator-based queries: 4.1.0 - -**REST & HTTP** - -- REST interface: 4.2.0 -- HTTP/2 support: 4.5.0 -- Improved URL path parsing: 4.5.0 -- server.authenticateUser API: 4.5.0 -- Worker threads for HTTP: 4.1.0 - -**Real-Time & MQTT** - -- MQTT support: 4.2.0 -- WebSocket support: 4.2.0 -- Server-Sent Events: 4.2.0 -- MQTT mTLS: 4.3.0 -- MQTT single-level wildcards: 4.3.0 -- MQTT retain handling: 4.3.0 -- Improved message delivery: 4.5.0 - -**Replication & Clustering** - -- Native replication (Plexus): 4.4.0 -- Sharding: 4.4.0 -- Expanded sharding functionality: 4.5.0 -- Clone node: 4.2.0 -- Replicated operations: 4.4.0 - -**Logging** - -- Logging consolidated to hdb.log: 4.1.0 -- Per-component logging: 4.6.0 -- Dynamic logging reload: 4.6.0 -- HTTP logging: 4.6.0 - -**GraphQL** - -- OpenAPI specification: 4.3.0 -- Native GraphQL support (provisional): 4.4.0 -- GraphQL disabled by default: 4.5.0 - -**Storage & Performance** - -- Database structure (single file): 4.2.0 -- Storage performance improvements: 4.3.0 -- Compression by default: 4.3.0 -- Compact database: 4.3.0 -- Storage reclamation: 4.5.0 - -**Analytics** - -- Resource and storage analytics: 4.5.0 -- New analytics/licensing: 4.7.0 - -**Studio** - -- Local studio upgrade: 4.3.0 - -## Notes - -- Many current `reference/` files appear to already be partially reorganized -- The `versioned_docs/` folders contain the historical record -- Compare git history to validate when features were actually introduced -- Use release notes to cross-reference feature versions (see Release Notes Reference Guide above) -- Consider scripting the version annotation process for configuration options -- The release notes directory contains 169 files - use the Feature-to-Release Note Mapping above as a quick reference diff --git a/v4-docs-project-brief.md b/v4-docs-project-brief.md deleted file mode 100644 index 1ff84317..00000000 --- a/v4-docs-project-brief.md +++ /dev/null @@ -1,419 +0,0 @@ -# Harper v4 Documentation Migration - Project Brief - -**Last Updated**: 2026-02-18 -**Status**: Planning Complete - Ready to Execute -**Phase**: Pre-Pilot -**Branch**: `major-version-reorg` - ---- - -## Executive Summary - -We are consolidating Harper v4 documentation from seven versioned folders (v4.1 through v4.7) into a single, unified v4 reference using inline version annotations (Node.js style). Simultaneously, we're reorganizing from role-based categories ("Developers," "Administration") to a flat, feature-based structure (CLI, Database, REST, Components, etc.). - -This consolidation will improve documentation maintainability, make features more discoverable, and establish a strong foundation for v5 documentation. The migration involves 20 top-level sections, ~100+ individual pages, and will be executed using AI agents (Claude Code) for initial content generation with human review and refinement. - -**Target**: `reference_versioned_docs/version-v4/` (NOT `reference/` - that's for v5 later) - ---- - -## Quick Links - -- **[Reference Plan](./v4-docs-reference-plan.md)** - Target structure and philosophy (the "what" and "why") -- **[Migration Map](./v4-docs-migration-map.md)** - Detailed file-by-file mapping (the "where") -- **[Implementation Plan](./v4-docs-implementation-plan.md)** - Technical specifications for agents (the "how") -- **[Execution Procedure](./v4-docs-execution-procedure.md)** - Team workflow and process (the "who/when") -- **[Research Notes](./v4-docs-research.md)** - Manual research on feature evolution -- **[AI Feature History](./v4-feature-history-ai-gen.md)** - AI-generated feature timeline (use with caution) - ---- - -## Current Status - -### Phase Status - -- ✅ Planning & Documentation Complete -- ⏸️ Team Review Pending -- ⏳ Pilot Execution Not Started -- ⏳ Scale Execution Not Started -- ⏳ Cleanup Not Started - -### Sections Status (0/20 Complete) - -**Phase 1A - Simple** (0/5) - -- [ ] CLI -- [ ] Content Types -- [ ] Headers -- [ ] GraphQL Querying -- [ ] Studio - -**Phase 1B - Medium** (0/7) - -- [ ] Security -- [ ] Environment Variables -- [ ] Static Files -- [ ] HTTP -- [ ] MQTT -- [ ] Logging -- [ ] Analytics - -**Phase 1C - Complex** (0/5) - -- [ ] REST -- [ ] Replication -- [ ] Database -- [ ] Resources -- [ ] Components - -**Phase 1D - Cross-Cutting** (0/2) - -- [ ] Operations API -- [ ] Configuration - -**Phase 1E - Legacy** (0/1) - -- [ ] Legacy Content - -### Metrics - -- **PRs Opened**: 0/20 -- **PRs Merged**: 0/20 -- **Link Placeholders Created**: 0 -- **Link Placeholders Resolved**: 0 -- **Days Elapsed**: 0 -- **Estimated Days Remaining**: 21-28 - ---- - -## Key Decisions Log - -### 2026-02-18: Initial Planning - -- **Decision**: Use VSCode + Claude Code approach (vs fully automated Agent SDK) -- **Rationale**: Provides visibility and control; can pivot to automation if needed -- **Impact**: Requires manual orchestration but allows quality validation throughout - -### 2026-02-18: Target Directory - -- **Decision**: Output to `reference_versioned_docs/version-v4/` not `reference/` -- **Rationale**: Clean separation; `reference/` will be used for v5 kickstart later -- **Impact**: Additional step required later to copy to `reference/` for v5 - -### 2026-02-18: Transaction Logging Reorganization - -- **Decision**: Move transaction/audit logging from `logging/` to `database/` -- **Rationale**: Transaction logging is a database-level concern, not application logging -- **Impact**: Better conceptual organization; `logging/` focuses on app/system logs - -### 2026-02-18: Link Placeholder Strategy - -- **Decision**: Use `TODO:path` format in actual markdown links with per-section tracker files -- **Rationale**: Easy to find/replace, works with markdown parsers, no merge conflicts -- **Impact**: Separate cleanup phase needed to resolve placeholders - -### 2026-02-18: Complete Sections in Single PRs - -- **Decision**: Don't split large sections (like Configuration) into multiple PRs -- **Rationale**: Easier to review section holistically; context is preserved -- **Impact**: Some PRs will be large but provide complete picture - -### 2026-02-18: Pilot-First Approach - -- **Decision**: Run CLI and Security as pilots before scaling -- **Rationale**: Validate quality and process before committing to full migration -- **Impact**: Adds ~2-3 days upfront but reduces risk of rework - -### 2026-02-19: Temporary Build Simplifications - -- **Decision**: Temporarily disable local search plugin and set `onBrokenLinks: 'warn'` -- **Rationale**: Allows build to succeed during migration while reference docs are being populated -- **Impact**: Must remember to re-enable before merging to main: - - Re-enable local search plugin in `docusaurus.config.ts` themes section - - Change `onBrokenLinks` back to `'throw'` -- **Note**: prebuild.js and postbuild.js scripts are still needed and should remain: - - prebuild.js generates release-notes-data.json used by React components - - postbuild.js creates index.html files for URL flexibility (/path and /path/) - - Remove or update prebuild/postbuild scripts if no longer needed - ---- - -## Known Issues & Blockers - -### Current Blockers - -_None - ready to begin execution_ - -### Potential Risks - -1. **Version annotation accuracy** - AI might infer wrong introduction versions - - _Mitigation_: Confidence levels + human verification + release notes validation - -2. **Content quality variability** - Some sections might need significant editing - - _Mitigation_: Pilot sections first; refine prompts based on learnings - -3. **Review capacity** - Team might be overwhelmed by 20 large PRs - - _Mitigation_: Flexible timeline; can slow down review velocity as needed - -4. **Link placeholder confusion** - Placeholders might be unclear or incorrect - - _Mitigation_: Clear format specification; dedicated cleanup phase - -### Watch Items - -- [ ] Current `reference/` and `reference_versioned_docs/version-v4/` directories are empty (confirmed reset) -- [ ] All planning documents are up to date -- [ ] Team has capacity for 2-3 PR reviews per day -- [ ] GitHub tracking issue needs to be created before execution - ---- - -## Upcoming Milestones - -### Next Steps (Immediate) - -1. **Present to team** - Review all planning docs, get feedback and buy-in -2. **Environment setup** - Verify VSCode, Claude Code, gh CLI ready -3. **Create tracking issue** - Set up GitHub issue for progress tracking -4. **Run Pilot 1 (CLI)** - Execute first section, evaluate quality -5. **Team sync** - Review pilot results, refine approach - -### Near-Term Milestones (Next 2 Weeks) - -- [ ] Pilot sections complete (CLI + Security) -- [ ] Decision on scaling approach (continue VSCode or build automation) -- [ ] Phase 1A complete (5 simple sections) -- [ ] Phase 1B started (medium complexity sections) - -### Medium-Term Milestones (Next 4 Weeks) - -- [ ] All 20 sections have PRs merged -- [ ] Link resolution complete -- [ ] Cross-references updated -- [ ] Sidebars configured - -### Long-Term Milestones (Next 6 Weeks) - -- [ ] Redirects configured -- [ ] Old versioned_docs removed -- [ ] Final validation complete -- [ ] Merged to main - ---- - -## For AI Agents: Quick Context - -**Project Goal**: Migrate v4 docs from `versioned_docs/version-4.X/` → `reference_versioned_docs/version-v4/` with restructuring. - -**Your Role**: Generate initial content drafts by: - -1. Reading migration map entry for assigned section -2. Reading all source files listed (primary + additional) -3. Reading release notes for version info -4. Generating new files with inline source comments and version annotations -5. Creating link placeholders for cross-references -6. Creating branch, committing, opening PR - -**Key Constraints**: - -- ✅ DO add files to `reference_versioned_docs/version-v4/` -- ✅ DO include inline source comments -- ✅ DO use `TODO:path` format for link placeholders -- ✅ DO note confidence levels on version annotations -- ❌ DON'T remove anything from `versioned_docs/` yet -- ❌ DON'T add files to `reference/` (that's for v5) -- ❌ DON'T guess at version dates without noting uncertainty - -**Key Files to Reference**: - -- `v4-docs-migration-map.md` - Your primary instruction source (which files to read, where to write) -- `v4-docs-implementation-plan.md` - Detailed agent instructions (Part 1) -- `v4-docs-reference-plan.md` - Target structure and philosophy -- `release_notes/*.md` - For version annotation validation - -**PR Template**: See `v4-docs-implementation-plan.md` Part 1 for complete template. - -**Success Criteria**: - -- All files in correct location with proper structure -- Inline source comments on all content -- Version annotations with confidence levels -- Link placeholders in correct format -- Link placeholder tracker file created -- PR description complete and accurate - ---- - -## Team Assignments - -### Project Lead - -- **Name**: Ethan -- **Responsibilities**: Overall coordination, decision making, pilot execution - -### Reviewers - -_TBD after team discussion_ - -### Execution Assignments - -_To be determined after pilot phase_ - ---- - -## Notes & Learnings - -### Planning Phase Insights - -- Horizontal consolidation (v4.1→v4.7) + vertical reorganization (role-based→feature-based) are parallel transformations -- Starting with v4.7 as base and annotating backwards is more efficient than building forward from v4.1 -- Migration map revealed several complex merges (Configuration 59KB, Schema from 5+ files, Clustering 10+ files) -- Transaction/audit logging conceptually belongs with database, not application logging -- Current `reference/` folder was already partially reorganized (work in progress) - -### Process Improvements - -_To be filled in as we learn from pilots and execution_ - -### Template Refinements - -_To be filled in as we refine prompts based on pilot results_ - -### Common Issues - -_To be filled in as patterns emerge during execution_ - ---- - -## Change Log - -### 2026-02-18 - Project Initialization - -- Created all planning documents -- Completed migration map (20 sections, ~100+ files mapped) -- Defined reference structure and philosophy -- Established execution procedure -- Ready for team review and pilot execution - ---- - -## Future Considerations - -### Post-Migration Tasks (Out of Scope for Now) - -- Copy content from `reference_versioned_docs/version-v4/` to `reference/` to kickstart v5 -- Begin v5 documentation structure planning -- Consider automation for future minor version consolidations -- Evaluate if this approach works for v3 historical docs - -### Process Improvements for Next Time - -- Could build Agent SDK automation upfront if this approach proves successful -- Template-based content generation for consistent structure -- Automated version annotation extraction from git history -- Automated redirect generation from sitemap analysis - -### Documentation Enhancements - -- Consider adding diagrams/flowcharts to planning docs -- Video walkthrough of the process for future team members -- Automated progress dashboard from migration map status fields - ---- - -## Quick Reference - -### Directory Structure - -``` -documentation/ -├── versioned_docs/ -│ ├── version-4.1/ # Historical (source) -│ ├── version-4.2/ # Historical (source) -│ ├── version-4.3/ # Historical (source) -│ ├── version-4.4/ # Historical (source) -│ ├── version-4.5/ # Historical (source) -│ ├── version-4.6/ # Historical (source) -│ └── version-4.7/ # Latest (primary source) -├── reference_versioned_docs/ -│ └── version-v4/ # TARGET (new consolidated docs) -├── reference/ # Empty (for v5 later) -├── migration-context/ -│ └── link-placeholders/ # Per-section placeholder trackers -└── *.md # Planning documents -``` - -### Common Commands - -```bash -# Switch to migration branch -git checkout major-version-reorg - -# Create placeholder tracker directory -mkdir -p migration-context/link-placeholders - -# Check current status -git status - -# Create new migration branch for section -git checkout -b migration/[section-name] - -# Open PR via gh CLI -gh pr create --base major-version-reorg --title "..." --body "..." - -# Check all migration map status -grep "Status:" v4-docs-migration-map.md -``` - -### Key Metrics to Track - -- Sections complete: `X/20` -- PRs open: `X` -- PRs merged: `X` -- Link placeholders: `X created, Y resolved` -- Days elapsed: `X` -- Average review time: `X hours/PR` - ---- - -## Questions & Decisions Needed - -### Before Pilot - -- [ ] Team reviewed all planning docs? -- [ ] Reviewers assigned for pilot sections? -- [ ] GitHub tracking issue created? -- [ ] Environment setup verified? - -### After Pilot - -- [ ] Is content quality acceptable? -- [ ] Are version annotations accurate? -- [ ] Is link placeholder format working? -- [ ] Continue with VSCode or build automation? -- [ ] Any prompt refinements needed? - -### Before Scaling - -- [ ] Pilot learnings documented? -- [ ] Prompts refined based on pilot? -- [ ] Review assignments made? -- [ ] Ready to open 15-18 more PRs? - -### Before Cleanup - -- [ ] All sections merged? -- [ ] Ready to start link resolution? -- [ ] Any orphaned content to address? -- [ ] Ready to configure sidebars/redirects? - -### Before Merge to Main - -- [ ] All cleanup phases complete? -- [ ] Documentation builds successfully? -- [ ] Redirects tested? -- [ ] Final validation complete? -- [ ] Ready to remove old versioned_docs? - ---- - -**Note**: This is a living document. Update status, metrics, decisions, and learnings as the project progresses. diff --git a/v4-docs-project-retrospective.md b/v4-docs-project-retrospective.md deleted file mode 100644 index 30850ab6..00000000 --- a/v4-docs-project-retrospective.md +++ /dev/null @@ -1,280 +0,0 @@ -# Harper v4 Documentation Rewrite — Project Retrospective - -**Date**: 2026-03-31 -**Branch**: `major-version-reorg` -**Duration**: ~6 weeks (2026-02-18 → 2026-03-31) - ---- - -## What We Set Out to Do - -The Harper v4 reference documentation had accumulated across seven minor version folders (`versioned_docs/version-4.1/` through `versioned_docs/version-4.7/`). Each minor version was a near-complete copy of the previous with additions — so any given page existed seven times, with subtle diffs that were nearly impossible to reason about together. On top of that, the content was organized by _user role_ ("Developers", "Administration", "Deployments") rather than by _feature_, which made individual capabilities like MQTT or Static Files genuinely hard to discover. - -The project had two simultaneous transformation goals: - -**Horizontal consolidation**: Merge seven versioned folders into a single `reference/v4/` document, using inline version annotations (Node.js documentation style) to record when features were added, changed, or deprecated across minor versions. - -**Vertical reorganization**: Restructure content from role-based groupings to a flat, feature-based hierarchy where each Harper built-in plugin or capability is a top-level section immediately visible in the sidebar. - -An additional constraint: the old `/docs/` URL space had been live for years, with backlinks across the internet and real traffic measured in Google Analytics. Every page needed a redirect to its new location — and the redirect mapping needed to be driven by data, not guesswork. - ---- - -## Planning Phase (2026-02-17 → 2026-02-19) - -### The Research Foundation - -Before any migration tooling or structure was created, manual research was done to map the evolution of Harper features across all seven minor versions. This is documented in [v4-docs-research.md](./v4-docs-research.md), which walks through what changed at each version from v4.1 to v4.7. - -Notable findings from this research: - -- The role-based navigation (`Developers` / `Administration`) had silently broken in v4.4 when [PR #303](https://github.com/HarperFast/documentation/blob/ade07fd9428b0321c047ac8243ad1106bb0de2a8/versioned_sidebars/version-4.4-sidebars.json) restructured developer onboarding and removed the `developers/` tab from the sidebar — those paths existed but were invisible for ~4 months. -- The evolution of "Custom Functions → Components → Applications/Extensions → Plugins" was one of the trickiest naming threads to track, since AI-generated timelines kept getting confused by the naming history. -- Transaction logging and audit logging had historically lived under `logging/` but conceptually belonged in `database/` — this was one of several reorganization decisions made during research. - -An AI-generated feature history file ([v4-feature-history-ai-gen.md](./v4-feature-history-ai-gen.md)) was also produced but flagged explicitly as "use with caution" — AI struggled with the naming evolution and the research notes reflect that the human was better positioned to piece it together. - -### The Plan Documents - -On **2026-02-18**, commit [`78eca4be`](https://github.com/HarperFast/documentation/commit/78eca4bed4630fd81f8f9328c7ed7e0e603a9589) created five planning documents in one shot (4,487 lines across 7 files): - -- **[v4-docs-project-brief.md](./v4-docs-project-brief.md)** — executive summary, status dashboard, key decisions log, team assignments -- **[v4-docs-reference-plan.md](./v4-docs-reference-plan.md)** — target structure philosophy, version annotation strategy, the full reference outline (directory tree), redirect philosophy -- **[v4-docs-migration-map.md](./v4-docs-migration-map.md)** — file-by-file mapping from old paths to new paths, with primary sources, additional sources, and merge requirements for each page -- **[v4-docs-implementation-plan.md](./v4-docs-implementation-plan.md)** — agent instructions, PR template, link placeholder format, section ordering -- **[v4-docs-research.md](./v4-docs-research.md)** — manual research notes (pre-existing, also committed here) - -Key architectural decisions made during planning: - -1. **Feature-first organization**: Stop grouping by "Developers" / "Administration". Make every Harper capability (CLI, MQTT, Static Files, Components, etc.) a top-level sidebar section. This mirrors how Stripe, Node.js, and other API docs are organized, and more accurately maps to how Harper is actually built — around plugins and features. - -2. **`overview.md` instead of `index.md`**: Following the Learn section pattern, reference sections use non-collapsible sidebar headers with an explicit `overview.md` at the top. No hidden index pages. - -3. **Primary vs. secondary reference pattern**: For features that span multiple sections (like Operations APIs), there's one exhaustive primary reference that other sections link to with only quick-reference summaries. Prevents duplication while maintaining discoverability. - -4. **Inline version annotations**: Node.js-style annotations (`Added in: v4.3.0`) placed inline in the content, not in YAML frontmatter. Confidence levels were required — agents had to distinguish `(confirmed via release notes)` from `(inferred from version comparison, needs verification)`. - -5. **`TODO:path` link placeholders**: Since 20 sections were being written in parallel across PRs, cross-section links couldn't be real until after all sections existed. The format `[Text](TODO:reference_versioned_docs/version-v4/section/page.md 'description')` was chosen for easy grep/replace in a later cleanup pass. - -6. **AI-first, human-review workflow**: AI agents (Claude Code in VSCode) do initial content generation from the source files; humans review, edit, and merge. Not fully automated — visibility and quality control were prioritized over speed. - -7. **Target directory**: Content goes to `reference_versioned_docs/version-v4/` first (not `reference/`), with a later copy step to `reference/` to kickstart v5. This kept v5 concerns out of scope. - -On **2026-02-19**, commit [`241f8cbe`](https://github.com/HarperFast/documentation/commit/241f8cbeab330140999a045c5db6e3b4eadf08d8) configured the build system for the migration branch: - -- Temporarily disabled the local search plugin -- Set `onBrokenLinks: 'warn'` (would throw in production; needed to allow incremental builds during migration) -- Added redirect page infrastructure -- The site now built successfully, ready for migration PRs - -Also in this commit: `scripts/harper-docs-analytics.csv` — 1,635 rows of Google Analytics pageview data (Oct 2025 – Feb 2026) that would later drive the redirect priority decisions. - -A `scripts/analyze-pageview-data.mjs` script was also created to process the CSV and surface the top-trafficked paths. - ---- - -## Content Migration Phase (2026-02-23 → 2026-03-27) - -The migration was structured into five phases based on complexity. Each section was a separate PR merged into `major-version-reorg`, with Claude Code generating initial content from the source versioned files. - -### Phase 1A — Simple, Stable Sections - -| Section | PR Merged | Commit | -| ---------------- | ---------- | --------------------------------------------------------------------------------------------------------- | -| CLI | 2026-02-23 | [`021d8000`](https://github.com/HarperFast/documentation/commit/021d80004f8a3b8be9d2be9faecbc33ca583e30d) | -| GraphQL Querying | 2026-02-24 | [`af96a726`](https://github.com/HarperFast/documentation/commit/af96a726203b35952583bd3eba6e226c419cb7a5) | -| Studio | 2026-02-24 | [`2c599700`](https://github.com/HarperFast/documentation/commit/2c599700eb40ab9ea9c91587e270026018515fc2) | -| Fastify Routes | 2026-02-24 | [`c6c99e5f`](https://github.com/HarperFast/documentation/commit/c6c99e5f6a94901bae80bdc98524bce7fd82dbce) | - -### Phase 1B — Medium Complexity - -| Section | PR Merged | Commit | -| --------------------- | ---------- | --------------------------------------------------------------------------------------------------------- | -| Environment Variables | 2026-02-25 | [`cd47bee3`](https://github.com/HarperFast/documentation/commit/cd47bee3d2bc5e48c4fe88d5b7f56bb9a5b1c20f) | -| HTTP | 2026-02-26 | [`fa4d2f38`](https://github.com/HarperFast/documentation/commit/fa4d2f38db2c6668dc336700375f2528ee36477b) | -| Static Files | 2026-03-02 | [`2d5d2939`](https://github.com/HarperFast/documentation/commit/2d5d2939003f612ff3f773c14b68fd0a5b217fc6) | -| Logging | 2026-03-04 | [`5271417c`](https://github.com/HarperFast/documentation/commit/5271417cb87021a21f078584b95d729e2d37aad9) | -| Analytics | 2026-03-10 | [`5fa17671`](https://github.com/HarperFast/documentation/commit/5fa176712840179889e16adb64e2cfe2c4deade7) | -| MQTT | 2026-03-11 | [`e46a359f`](https://github.com/HarperFast/documentation/commit/e46a359f2b0b6d9d9e08a80bfe84dadf19e80d95) | - -### Phase 1C — Complex Sections - -| Section | PR Merged | Commit | -| ------------------------ | ---------- | --------------------------------------------------------------------------------------------------------- | -| Security + Users & Roles | 2026-03-17 | [`37580219`](https://github.com/HarperFast/documentation/commit/3758021962bc06ccd8e4ebaef5aea4cd4e7173a2) | -| REST | 2026-03-18 | [`ac8b9c90`](https://github.com/HarperFast/documentation/commit/ac8b9c90fb32e48a2e3eec05e86831d9cb3e0ebe) | -| Database | 2026-03-26 | [`3508aabc`](https://github.com/HarperFast/documentation/commit/3508aabcf6da255b696100710d2f1e68ccea02c0) | -| Resources | 2026-03-26 | [`625fa2b6`](https://github.com/HarperFast/documentation/commit/625fa2b615e6079bf4b082100c10b2bdedd67174) | -| Components | 2026-03-27 | [`7359fcbb`](https://github.com/HarperFast/documentation/commit/7359fcbb9c1b1d5d24ef0b65f0f1b1be8d7e1963) | -| Replication | 2026-03-27 | [`ef09307e`](https://github.com/HarperFast/documentation/commit/ef09307e382a49b743aefee3a4ec0caa23665033) | - -### Phase 1D — Cross-Cutting Sections - -| Section | PR Merged | Commit | -| -------------- | ---------- | --------------------------------------------------------------------------------------------------------- | -| Operations API | 2026-03-27 | [`4f7fc1e0`](https://github.com/HarperFast/documentation/commit/4f7fc1e03eb6dd99cff69c28fc4f8117afac67c4) | -| Configuration | 2026-03-27 | [`ffc57e0d`](https://github.com/HarperFast/documentation/commit/ffc57e0d2bdf4b811d951d1a6015486433727549) | - -### Phase 1E — Legacy Content - -Added during migration in commit [`92ef6d5b`](https://github.com/HarperFast/documentation/commit/92ef6d5bc29c4b387261f6ab1fc6f6152d2dacb8): - -- `legacy/cloud.md` — Harper Cloud landing page directing to Fabric -- `legacy/custom-functions.md` — what Custom Functions were; points to Components -- `database/sql.md` — SQL is documented content, not just a deprecation notice, so it got a real page rather than a legacy stub - -### Adaptations from the Original Plan - -Several sections evolved during migration: - -- **Security split**: RBAC content was broken out from `security/` into its own top-level `users-and-roles/` section. The breadth of content (operations API, config file roles, permission structure) warranted its own section. -- **HTTP TLS page**: `http/tls.md` was added during migration — TLS config warranted its own page beyond what the plan specified. -- **Components JS environment**: `components/javascript-environment.md` was added to capture JS globals (server, logger, etc.) that didn't fit cleanly elsewhere. -- **Environment Variables no config page**: `environment-variables/configuration.md` was not created — the content was ported directly into `configuration/overview.md` instead. -- **Database API page**: `database/api.md` was added for JS globals (`tables`, `databases`, `transaction()`, `createBlob()`) that didn't have a clear home in the original plan. -- **Resources global APIs not created**: `resources/global-apis.md` was skipped because that content was covered by `components/javascript-environment.md`. - ---- - -## Link Resolution Phase (2026-03-30) - -Once all 20 sections were merged, all `TODO:path` placeholders were resolved in a single PR: - -- **Link Resolution PR #467** — commit [`dd8fc4fe`](https://github.com/HarperFast/documentation/commit/dd8fc4feddf047dcaceadacc0a8043c54cca62ae) - -This was done section-by-section, resolving placeholders by scanning the actual files that now existed and replacing `TODO:path` strings with real relative paths. The per-section tracker files in `migration-context/link-placeholders/` were deleted after the PR merged. - -**Cross-reference updates PR #468** — commit [`13e1f53b`](https://github.com/HarperFast/documentation/commit/13e1f53bb59bc49553bbf42ad7b8e7bd4f50cb36) updated old `/docs/` links in both release notes and learn guides to point to the new `/reference/v4/` paths. - ---- - -## Redirect Strategy (2026-03-30) - -### The Input Data - -The redirect work was driven by two inputs: - -1. **Google Analytics CSV** (`scripts/harper-docs-analytics.csv`, committed in [`fb672f4b`](https://github.com/HarperFast/documentation/commit/fb672f4bec334e05e64b1da87a1f466b8d8aff27)) — 1,635 rows of pageview data from October 2025 through February 2026. This gave traffic volumes for every old path. - -2. **`scripts/analyze-pageview-data.mjs`** — a script to process the CSV and rank paths by visit count, committed alongside the analytics data. - -The planning for redirects was documented in [memory/part5-redirects.md](./memory/part5-redirects.md), which contains: - -- The full new URL structure (`/reference/v4/[section]/[page]`) -- Annotated list of every old path with visit counts (50+ views) and their mapped targets -- Paths explicitly identified as needing no new redirect (install guides, `/learn/`, `/fabric/`) -- Notes on the old `redirects.ts` issues (stale `withBase()` abstraction, very old HarperDB-era rules) - -### The Tier System - -Redirects were prioritized by traffic volume: - -- **High priority (>200 views)**: 17 paths — explicit per-path redirects with comments -- **Medium priority (50–200 views)**: ~40 paths — explicit redirects -- **Low traffic (<50 views)**: Catch-all patterns rather than individual rules -- **Versioned paths (`/docs/4.X/...`)**: Low traffic across the board — catch-all redirect to `/reference/v4/` - -Notable redirect decisions: - -- `/docs/` root (2,854 views) → `/` (site root) -- `/docs/developers/applications/caching` (410 views) → `/reference/v4/resources/overview` (with a comment noting this should eventually point to a dedicated Learn guide) -- `/docs/reference/globals` (277 views) → `/reference/v4/components/javascript-environment` (the globals page became the JS environment page) - -### The Output - -Commit [`5e84ecf0`](https://github.com/HarperFast/documentation/commit/5e84ecf03a583129c5b752e79369b94d5c4d4691) (2026-03-30) — "finish redirects": - -- **`redirects.ts`** — rewritten (469 lines, net +153): non-versioned `/docs/*` paths → new `/reference/v4/` paths -- **`historic-redirects.ts`** — new file (1,811 lines): versioned `/docs/4.X/*` paths → new paths -- **`scripts/pageview-data-test.js`** — 215-line test script to validate redirect coverage against the analytics data -- **`CONTRIBUTING.md`** — added notes on the `docusaurus serve` bug with `4.X` paths (the `serve-handler` bug that treats `4.6` as a file extension) and the patch procedure - ---- - -## Old Content Deletion and Final Wiring (2026-03-30) - -Three commits on the same day completed the transition: - -1. **[`99bf4d81`](https://github.com/HarperFast/documentation/commit/99bf4d819d64604c8ebbda49153ca147f29ac96c)** — "checkpoint before deleting old content files" — final snapshot before deletion - -2. **[`48764459`](https://github.com/HarperFast/documentation/commit/487644598ddf55344c3c1c0e908ebabeeb4c84b4)** — "delete old docs content" — removed the entire `docs/` tree: `docs/administration/`, `docs/deployments/`, `docs/developers/`, `docs/reference/`, etc. (~34 files, thousands of lines) - -3. **[`0ebea43a`](https://github.com/HarperFast/documentation/commit/0ebea43acfc82215ff5d44d14ee8d40922bf4f63)** — "copy new content to reference/" — copied the finalized content from `reference_versioned_docs/version-v4/` into `reference/` to serve as the v5 starting point (as planned from the beginning) - -Additional cleanup on the same day: - -- [`7c62241a`](https://github.com/HarperFast/documentation/commit/7c62241afc25a184a1a0c7a82adc0c7acec12272) — removed paginator from reference section -- [`9aee72d7`](https://github.com/HarperFast/documentation/commit/9aee72d714cb458c7622f4b9f8bfa9f5cf67251a) — format pass -- [`256de664`](https://github.com/HarperFast/documentation/commit/256de664cf4526bfc78c7a82d259db929f84845f) — re-enabled `onBrokenLinks: 'throw'` (the temporary `'warn'` setting from the planning phase was finally reverted) - ---- - -## What the Final Structure Looks Like - -The new reference lives at `/reference/v4/` with 20 top-level sections, each with an `overview.md` and additional pages: - -``` -reference/v4/ -├── analytics/ (overview, operations) -├── cli/ (overview, commands, authentication, operations-api-commands) -├── components/ (overview, applications, extension-api, javascript-environment, plugin-api) -├── configuration/ (overview, options, operations) -├── database/ (overview, schema, api, data-loader, storage-algorithm, jobs, system-tables, compaction, transaction, sql) -├── environment-variables/ (overview) -├── fastify-routes/ (overview) -├── graphql-querying/ (overview) -├── http/ (overview, configuration, api, tls) -├── legacy/ (cloud, custom-functions) -├── logging/ (overview, configuration, api, operations) -├── mqtt/ (overview, configuration) -├── operations-api/ (overview, operations) -├── replication/ (overview, clustering, sharding) -├── resources/ (overview, resource-api, query-optimization) -├── rest/ (overview, querying, headers, content-types, websockets, server-sent-events) -├── security/ (overview, basic-authentication, jwt-authentication, mtls-authentication, certificate-management, certificate-verification, configuration, api) -├── static-files/ (overview) -├── studio/ (overview) -└── users-and-roles/ (overview, configuration, operations) -``` - -The `versioned_docs/version-4.X/` folders were removed. The seven-version structure is gone. Version history for any feature is now expressed inline within the single v4 reference using version annotations. - ---- - -## Notable Technical Decisions and Tradeoffs - -### Why AI Agents, Not Pure Automation - -The original plan considered building an Agent SDK pipeline to fully automate migrations. The decision was made to use Claude Code in VSCode instead — providing visibility at each step and allowing human intervention on each PR. The project brief ([v4-docs-project-brief.md](./v4-docs-project-brief.md#key-decisions-log)) explicitly called out: "Provides visibility and control; can pivot to automation if needed." - -In practice, this meant each section still had a "manual review" commit before merging. Examples: [`253d3aae`](https://github.com/HarperFast/documentation/commit/253a3eae), [`55432eaa`](https://github.com/HarperFast/documentation/commit/55432eaa), [`c7286b58`](https://github.com/HarperFast/documentation/commit/c7286b58). - -### The `--fixup` Commit Strategy - -Migration branches used `git commit --fixup ` for corrections to keep the development history clean while allowing easy squashing. This is described in the implementation plan and visible in the commit history of individual migration branches before squash-merge. - -### Preview Deployments for the Migration Branch - -Commit [`296064fd`](https://github.com/HarperFast/documentation/commit/296064fd05761486bc2861ccc6cae12a68ca6190) updated GitHub Actions workflows to generate preview deployments for PRs against `major-version-reorg` (not just PRs against `main`). This allowed reviewing each section in the rendered site before merging. - -### The `docusaurus serve` / `4.X` Path Bug - -While testing the final historic redirects, a bug was discovered in `serve-handler` (a Vercel library used by `docusaurus serve`) where directory paths ending in a number like `4.6` are treated as files rather than directories (because `4.6` looks like a file extension). This caused 404s on all `/docs/4.X/` paths locally. The fix required patching `node_modules/serve-handler/src/index.js` — the patch instructions were added to `CONTRIBUTING.md` in commit [`5e84ecf0`](https://github.com/HarperFast/documentation/commit/5e84ecf03a583129c5b752e79369b94d5c4d4691). An upstream PR was filed at https://github.com/vercel/serve-handler/pull/230. - -### Redirect Testing with Real Analytics Data - -Rather than manually checking redirects, a `scripts/pageview-data-test.js` script (215 lines, added in the "finish redirects" commit) validates redirect coverage against the actual analytics CSV. This makes the redirect file auditable — you can run the test and see which high-traffic paths have explicit redirects vs. fallthrough. - ---- - -## By the Numbers - -- **Duration**: ~6 weeks (Feb 18 – Mar 31, 2026) -- **Sections migrated**: 20 -- **PRs merged (migration)**: ~20 section PRs + link resolution + cross-references -- **Source version folders eliminated**: 7 (`version-4.1` through `version-4.7`) -- **Old `docs/` files deleted**: ~100+ files across administration, deployments, developers, and reference subdirectories -- **New reference pages created**: ~60+ files -- **Redirects configured**: ~150+ explicit rules in `redirects.ts` + 1,811 lines of versioned path rules in `historic-redirects.ts` -- **Analytics paths analyzed**: 1,635 rows of pageview data used to prioritize redirect targets -- **Planning documents written**: 5 documents (~4,500 lines) before a single migration PR was opened diff --git a/v4-docs-reference-plan.md b/v4-docs-reference-plan.md deleted file mode 100644 index 983c3711..00000000 --- a/v4-docs-reference-plan.md +++ /dev/null @@ -1,379 +0,0 @@ -# Harper v4 Reference Docs Plan - -## Summary - -This plan addresses two major transformations of the Harper v4 documentation: - -**Horizontal Consolidation**: Merging versioned documentation from v4.1 through v4.7 into a single comprehensive v4 reference, using inline version annotations (similar to Node.js docs) to track when features were added, changed, or deprecated across minor versions. - -**Vertical Reorganization**: Restructuring the documentation from role-based categories ("Developers," "Administration") to a flat, feature-based organization where Harper's core capabilities (CLI, Database, REST, MQTT, Components, Security, etc.) are prominently displayed as top-level sections. - -### Key Strengths - -**Feature-First Organization**: Core features like Static Files, Environment Variables, and MQTT become immediately discoverable as top-level sections rather than buried in nested hierarchies. This mirrors how successful API documentation (Stripe, Node.js) is structured and reflects how Harper is actually built - around plugins and features. - -**Primary vs Secondary Reference Pattern**: Complex features that span multiple concerns (like Operations APIs used across different subsystems) have one exhaustive "primary" reference with other sections providing "quick reference" links. This solves discoverability without creating maintenance nightmares from duplication. - -**Non-Collapsible Navigation**: Following the Learn section's pattern, all reference sections are visible immediately in the sidebar using `overview.md` files instead of hidden index pages. Users can visually scan the full feature list without clicking to expand nested sections. - -**Pragmatic Legacy Handling**: Deprecated features (SQL, Custom Functions, Cloud) are moved to a dedicated `legacy/` section without extensive reorganization. This respects the evolution of Harper v4 while steering users toward current best practices. - -**Intelligent Redirect Strategy**: Using sitemap analysis, Google Analytics data, and AI assistance to ensure existing documentation URLs remain functional, prioritizing the most-visited pages for perfect redirects while handling the long tail with catch-alls. - -**Separation of Features from Concepts**: The plan distinguishes between standalone features (Components, Logging, REST) and cross-cutting concepts (caching, web applications) that are better documented as aspects of features or covered in Learn guides rather than separate reference sections. - -This reorganization will significantly improve Harper documentation maintainability going forward, make v4 capabilities more discoverable to new users, and establish a solid foundation for v5 documentation. - ---- - -The primary goal of this reorganization is to highlight key features of Harper and then pack as much information into it as possible. Thus the primary reorganization point is to no longer arbitrarily sort pages by attributes or tags like "developers" or "administration" and instead flatten out the structure and present content based on the most relevant feature. - -We can generally follow a lot of what exists today as well as loosely use Harper's built-in feature list as a starting point. Any built-in plugin is really a core feature. Users shouldn't have to navigate through nested sections and pages to discover that Harper can support static file hosting. Static Files is a core feature and should be prominently displayed. More examples include: CLI, Configuration, Operations API, Security, Components, REST, Database, Resources, Environment Variables, (proper list in the outline below). - -There will of course be some overlap, but by organizing by top-level feature we can ideally capture the core information for that feature all in one place. Then other sections that may overlap can link to the core reference while still providing some high-level information. If we want to get really fancy we can use MDX components or even store doc information in JSON and reference it programmatically throughout the section to ensure consistency across references. - -For example, a lot of features have relative Operations APIs associated with them, such as `deploy_component`. The core `deploy_component` operation will be primarily documented in `reference/components/operations`. This will contain exhaustive information on the operation including all options, examples, edge cases, version history, etc. The same operation will also be listed on the `reference/operations-api/operations` page, but with only "necessary" or "quick" details such as description and options. This will then link out to the primary documentation reference for the operation in case users need more than a "quick" look. We'll utilize this pattern so that no matter how a user navigates the docs they should find what they are looking for. - -Now obviously this could create synchronization issues if someone forgets to update the docs in one place but not the other. This is why things should only have one "primary" reference and be linked to from other "secondary" or "quick" references. - -## Difference between a feature and concept - -While we often advertise "caching" as one of Harper's key features, the reality is caching is a concept or aspect of other core features. Caching on Harper makes no sense to someone unfamiliar with our Resource API. So instead of having a dedicated top-level "Caching" section in the reference, we should concentrate on documenting the parts of the resource API and schema system that enable caching. Then accompany this with appropriate Learn guides that do focus on implementing certain caching examples. - -Similarly, web applications are a feature of a variety of built-in and custom plugins. - -## Deprecated/Legacy Content - -There has been many changes over the course of v4, and many more to come in v5. Nonetheless, since we are collapsing documentation into major versions, we need to do something with content that is only relevant to a previous minor. Keep in mind that as long as we follow strict semver for our versioning, then we'll never have to deal with documenting a _removed_ feature in any singular major. I'm not necessarily going to solve for that circumstance because it really shouldn't happen. - -However, we have historically deprecated or discouraged use of certain features over the course of a major version (custom functions, sql) while still maintaining support. We need a place to document these features less-prominently than active features so that we can continue to direct users in the right direction. - -I believe this may be the only circumstance to make an exception to the general feature-based organization strategy (sorta). These legacy / deprecated / discouraged features should be nested within a top-level "Legacy" section. They can still be nested sections themselves, and potentially this is the one place we'd break the no-collapsing rule. The existing `/docs/reference/sql-guide` and `/docs/4.1/custom-functions/*` sections contain many pages. We really do not need to waste time rewriting or organizing this information. The simplest solution is to just take what exists and move it to a new `/reference/legacy/` sub path. - -## Index.md vs Overview.md - -One issue that has made our documentation confusing is the deep nesting of content. Users have to click many times to expand all the side-nav sections to potentially find the title they are looking for. - -Furthermore, a lot of nested sections have index pages that may or may not contain important information. An index page is the page you see when clicking on a nested section title. Its not always clear that these nested section titles are even clickable. - -As the Learn section demonstrates, section titles should be non-collapsible and not clickable. However, reference docs generally benefit from some sort of an overview section for general information and what not. As a result we have a choice: continue to use index pages (and iterate on making them more intuitively discoverable) or switch to an `overview.md` file that always exists at the top of any reference section. - -This doc map assumes that we'd match the non-collapsible section headers like the Learn section has. -All sections and the docs within would be visible immediately. - -This means no "index" pages as sometimes users don't realize it exists along with the nested content. -What would traditionally be an index page should now go into `overview.md`. - -We may experiment with the index page pattern and compare/contrast, but I believe (based on the style and experience of the learn section), that this structure is least confusing. - -## Scope and Flexibility of the Outline - -The reference section outline below represents our best understanding of Harper v4's feature landscape based on documentation analysis from v4.1 through v4.7. However, it's important to note that this map intentionally walks a line between completeness and manageability. - -**This is a living guide, not a rigid specification.** As implementation progresses, we expect to: - -- **Discover additional pages or subsections** that make sense to add as we work through actual content migration -- **Consolidate pages** that turn out to have less content than anticipated -- **Split pages** that become unwieldy into multiple focused documents -- **Adjust organization** based on cross-referencing patterns that emerge during writing - -**The map intentionally avoids overwhelming detail** in some areas. For example, MQTT configuration and security features (like mTLS) are noted but not broken into extensive subsections, even though they might warrant dedicated pages during implementation. Similarly, some features with significant cross-cutting concerns (security, configuration) are kept streamlined in the outline but will naturally expand to reference related sections throughout the docs. - -**Feature-specific configuration and operations pages may fluctuate.** While some features clearly need dedicated configuration pages (like `logging/configuration.md`), others might fold configuration details into their overview or have configuration sufficiently covered in the central `configuration/options.md` page. These decisions will become clearer as we write the actual content. - -The goal is to provide enough structure to guide implementation while remaining flexible enough to adapt to what we learn along the way. - -## Version Annotations Strategy - -Since we're consolidating v4.1 through v4.7 into a unified v4 reference, we need a consistent way to annotate when features were introduced, changed, or deprecated across minor versions. This follows the Node.js documentation pattern of inline version history. - -### Annotation Patterns - -**For new features:** - -```markdown -## Relationships - -Added in: v4.3.0 - -The `@relation` directive allows you to define relationships between tables... -``` - -**For changed features:** - -```markdown -### Auto-increment Primary Keys - -Changed in: v4.4.0 - -Primary keys can now auto-increment when defined as `Any`, `Int`, or `Long` types. -In previous versions, only GUIDs were supported for `ID` and `String` types. -``` - -**For deprecated features:** - -```markdown -## SQL Querying - -Deprecated in: v4.2.0 (moved to legacy in v4.7+) - -SQL querying is still supported but discouraged. Consider using the REST API -or custom resources for querying data. See [Database](../database/overview.md) -for modern alternatives. -``` - -**For configuration options:** - -```markdown -## Logging Configuration - -### `logger.level` - -- Type: `string` -- Default: `"info"` -- Added in: v4.1.0 - -### `logger.per_component` - -- Type: `object` -- Default: `{}` -- Added in: v4.6.0 - -Allows granular logging configuration per component or plugin. -``` - -### Annotation Guidelines - -- Use simple text annotations for now (no YAML frontmatter) -- Place version info prominently at the start of sections -- For minor changes within a feature, inline the version info with the specific detail -- Always indicate both when something was added AND when it changed significantly -- For deprecated features, provide guidance on modern alternatives -- When documenting operations APIs or configuration, include version info in tables/lists -- Focus on minor version (v4.3.0) unless a patch introduced the feature, then include patch (v4.3.2) - -### Building Version History - -When migrating content: - -1. Start with v4.7 documentation as the base (most current) -2. Compare with earlier versions (v4.6 → v4.5 → ... → v4.1) to identify when features appeared -3. Use release notes to validate feature introduction versions -4. Use git diff between version folders to catch subtle changes -5. Annotate as you build rather than trying to add annotations retroactively - -This approach ensures we preserve the evolution of Harper v4 while maintaining a single, coherent reference that serves users across all v4 minor versions. - -## Reference Section Outline - -``` -reference/ -├── cli/ -│ ├── overview.md # High-level overview of the Harper CLI. -│ │ # Include details such as general args, auth, -│ │ # and provide a list of all available commands -│ │ # with links to their appropriate detailed section -│ │ # (in the other pages). -│ │ -│ ├── commands.md # Detailed reference for each (non-operations api) CLI -│ │ # command including light examples. Remember to link to -│ │ # Learn section guides for more in-depth examples. -│ │ -│ ├── operations-api-commands.md # Detailed reference for each Operations API CLI command. -│ │ # Even if it may seem repetitive (with the actual respective -│ │ # operations api section), each command should clearly detail -│ │ # itself including description and available arguments. -│ │ -│ └── authentication.md # (Optional) Specific reference for CLI authentication -│ -├── configuration/ -│ ├── overview.md # High-level overview of Harper configuration, such as -│ │ # the `harper-config.yaml` file, configuration mechanisms, -│ │ # and maybe some architecture notes such as how some core -│ │ # features will require restarts, but other changes wont. -│ │ -│ ├── options.md # List/table of all options. include brief descriptions and -│ │ # any necessary info like data types and defaults. -│ │ # Keep in mind that features will contain their own config -│ │ # reference doc, and so this section should link out to the -│ │ # relative detailed docs. -│ │ -│ └── operations.md # List/table of all operations related to managing configuration -│ # in detail. -│ -├── operations-api/ -│ ├── overview.md # High-level info on operations api including basics like request -│ │ # shape and bonus features like health and open api endpoints. -│ │ # Should include authentication info, and link to the specific -│ │ # security pages for more details. -│ │ -│ └── operations.md # A complete simplified list of all operations that links out to -│ # specific sections for more details beyond short description and -│ # option data types. -│ -├── security/ -│ ├── overview.md # Deserves its own section since security is cross-feature and it -│ │ # can encompass pages on the specific security related operations -│ │ # and plugins like `tls`, JWT, and cert management. -│ │ # Many other sections will link to here when mentioning auth. -│ │ # The existing security section does a really excellent job of -│ │ # organization information. -│ │ -│ ├── basic-authentication.md # Basic auth mechanism details -│ │ -│ ├── jwt-authentication.md # JWT auth mechanism details -│ │ -│ ├── mtls-authentication.md # mTLS auth mechanism details -│ │ -│ ├── certificate-management.md # Certificate management details -│ │ -│ ├── certificate-verification.md # Certificate verification (OCSP, etc.) -│ │ -│ └── configuration.md # Authentication configuration (authorizeLocal, cacheTTL, -│ # enableSessions, token timeouts, hashFunction). Top-level -│ # `authentication:` section of harperdb-config.yaml. -│ -├── users-and-roles/ # Broken out from security/ during migration — RBAC warrants -│ │ # its own top-level section given the breadth of content -│ │ # (operations API, config file roles, permission structure). -│ │ -│ ├── overview.md # RBAC intro, roles, permission structure, operation -│ │ # restrictions reference table -│ │ -│ ├── configuration.md # Config file roles (roles.yaml), password hashing -│ │ -│ └── operations.md # Operations API: all role and user operations -│ -├── components/ -│ ├── overview.md # What are components? Evolution from custom functions to -│ │ # components to applications/extensions to plugins. -│ │ -│ ├── applications.md # Application component details and API -│ │ -│ ├── extension-api.md # Extension API reference -│ │ -│ └── plugin-api.md # Plugin API reference -│ -├── database/ -│ ├── overview.md # Explain how Harper's data system is powered by Resources, but you don't -│ │ # necessarily have to build custom resources to utilize the database system. -│ │ # Detail how a lot is achievable using the schema system and auto rest api. -│ │ -│ ├── schema.md # `graphqlSchema` plugin and the schema system. Including detailed api info -│ │ # on the available directives and data types for schemas. likely a long page. -│ │ # Can optionally break some parts out into their own pages like "blobs" and -│ │ # "vector" as exists today. -│ │ -│ ├── data-loader.md # `dataLoader` plugin reference -│ │ -│ ├── storage-algorithm.md # Storage algorithm details -│ │ -│ ├── jobs.md # Bulk data and jobs operations -│ │ -│ ├── system-tables.md # Harper system tables for variety of features -│ │ -│ ├── compaction.md # Storage compaction and compression details -│ │ -│ └── transaction.md # Transaction logging details -│ -├── resources/ -│ ├── overview.md # Split off from previous "data/" section since resources are generally for -│ │ # custom implementations. The previous section is all schema and data stuff. -│ │ # This one is all about building custom resources including the jsResource -│ │ # plugin and global apis. Likely easiest to doc the plugin in this page and -│ │ # use other pages for the api reference. -│ │ -│ ├── resource-api.md # Currently the resource api is split into two separate reference files that -│ │ # are very similar but with the `loadAsInstance` thing have different signatures. -│ │ # Easiest to stick to that model until we can simplify in future majors. -│ │ -│ ├── global-apis.md # `tables`, `databases`, `transactions` etc. -│ │ # `server` has its own section so mention and link. -│ │ -│ └── query-optimization.md # Query optimization details and best practices -│ -├── environment-variables/ -│ ├── overview.md # `loadEnv` plugin overview and usage -│ │ -│ └── configuration.md # Environment variable configuration options -│ -├── static-files/ -│ ├── overview.md # `static` plugin overview and usage -│ │ -│ └── configuration.md # Static file serving configuration options -│ -├── http/ -│ ├── overview.md # HTTP server overview and architecture -│ │ -│ ├── configuration.md # `http` configuration options -│ │ -│ └── api.md # `server` global API reference -│ -├── rest/ -│ ├── overview.md # `rest` plugin and the overall system as it interacts -│ │ # with things like schemas and custom resources. -│ │ -│ ├── querying.md # REST querying syntax and capabilities -│ │ -│ ├── headers.md # HTTP headers used by REST API -│ │ -│ ├── content-types.md # Supported content types (JSON, CBOR, MsgPack, CSV) -│ │ -│ ├── websockets.md # WebSocket support via REST plugin -│ │ -│ └── server-sent-events.md # Server-Sent Events (SSE) support -│ -├── mqtt/ -│ ├── overview.md # MQTT plugin overview, configuration, and usage -│ │ -│ └── configuration.md # MQTT-specific configuration options -│ -├── logging/ -│ ├── overview.md # Logging system overview and architecture -│ │ -│ ├── configuration.md # Logging configuration options (per-component, granular, etc.) -│ │ -│ ├── api.md # Logger global API reference -│ │ -│ └── operations.md # Logging-related operations API -│ -├── analytics/ -│ ├── overview.md # Analytics system overview (resource/storage analytics, system tables) -│ │ -│ └── operations.md # Analytics-related operations -│ -├── replication/ -│ ├── overview.md # Replication system overview (native replication, Plexus) -│ │ -│ ├── clustering.md # Clustering configuration and management -│ │ -│ └── sharding.md # Sharding configuration and strategies -│ -├── graphql-querying/ -│ └── overview.md # GraphQL querying feature (experimental/incomplete) -│ -├── studio/ -│ └── overview.md # Studio documentation (still ships with v4 but moving to legacy) -│ -├── fastify-routes/ -│ └── overview.md # Fastify routes documentation (discouraged in favor of components) -│ -└── legacy/ - ├── cloud/ # Legacy cloud documentation (replaced by Fabric) - │ - ├── custom-functions/ # Custom functions (deprecated in favor of components) - │ - └── sql/ # SQL guide (discouraged) -``` - -## Redirects - -One major concern with modifying the `/docs/` path is we've used this for many years for our documentation content. It is safe to assume that many backlinks to these pages exist across the internet. From our own content, to external posts written by community members. Thus, we must have a detailed plan for supporting these paths as we migrate to a new structure. - -We can start by analyzing the docusaurus generated sitemap for all existing paths today. Then, using Google Analytics data for paths visited, we can find out what paths have been navigated to since we enabled analytics in October 2025. And finally, we can look to the existing redirects.ts file to understand what redirects have been created so far. - -With a little help from AI, we can use these inputs to create a historical site map of paths we must ensure are redirected. With the analytics data, we can even understand exactly which paths are visited most frequently and depending on the volume, focus our efforts on redirecting the top N% of pages versus creating perfect redirects for all thousand pages. - -That said, AI is also _pretty_ good at automating a redirect map too. - -There is really a plethora of solutions here ranging from least to most effort and user experience. All paths must have a redirect, but effort is generally determined by how many of them will have "perfect redirects" to the most relevant content versus being included in a catch-all and how easy it will be to maintain the redirects overtime. And furthermore, many of our pages are duplicated across v4.2 to v4.7. If we assume we can safely redirect all of those duplicate pages, then the problem set significantly reduces. diff --git a/v4-docs-research.md b/v4-docs-research.md deleted file mode 100644 index a41c57f8..00000000 --- a/v4-docs-research.md +++ /dev/null @@ -1,328 +0,0 @@ -# v4 Docs Map - -The point of this is to figure out the documented evolution of Harper features and subsystems from v4.1 to v4.7 in order to create a holistic solution for reorganized singular major v4 versioned docs. - -This is aligned with renaming `docs/` to `reference/` and furthering the ideal separation of prose content into `learn/`. - -The structure I'm hoping to work towards is a single `reference/` folder layout that ideally has any _active_ features top-level. Then if there are legacy, deprecated, or even removed features (from a latest version), they will be documented in some sub-path such as `reference/legacy/` or something like that. - -When a feature has changed over time and some part of it is still active, but some other aspect has been deprecated; that feature should still live at top-level, but then that specific detail will be indicated as legacy/deprecated/removed. - -Since we are operating within a single major version; there realistically shouldn't have been any "removed" features. - -This information will also be extremely useful for guiding us on feature scope for v5 and beyond. - -My plan is to go through versioned docs folders one by one and try to logically piece together a map. I've given AI a couple runs at this. Including evaluating release-notes and everything else. Unfortunately, it doesn't do a great job and creating the correct timeline. Furthermore, it gets awfully confused by things like the evolution of custom functions to components to applications/extensions and finally plugins. So while I'm sure with enough context and better prompting an AI could figure out, I believe this will be easier to complete with a human touch; especially since I'm quite familiar with harper feature set already. - -## v4.1 - -Top Level sections include: - -- Installation - - Replaced by both newer installation instructions and learn guides -- Getting Started - - Replaced entirely by learn guides -- External API docs (api.harperdb.io) which has since been redirected to operations api -- Studio - - We'll keep this around for now since it is still shipped with v4 -- Cloud - - No longer need to support these pages; cloud has been fully replaced by fabric -- Security - - Lots of these pages still exist today but have been updated. - - In general I don't think there is anything in here that would be version specific; other than the larger concepts. - - This section has "JWT" and Certificate Management. in later harper versions we've likely added to those and so we'll detail that version specificity in the respective pages. - - Like there should be something that dictates that JWT support has existed as early as v4.1 - - But say some other scheme (Cookies?) didn't work until whatever minor version -- Clustering - - Nested - - More detailed config info for `clustering:` section. - - includes some high level info for the feature - - includes some ops apis -- Custom Functions - - Nested - - Very operations-api based - - Also includes some ops apis like `restart_service` for reloading custom functions (we have component apis analogous to this today) - - I think this section highlights how we'll need some sort of "legacy" ops api page or if things have been renamed and updated we need to ensure its detailed that something like `add_custom_function_project` has become _whatever_ in todays ops api -- Add-ons and SDKs - - I believe we've deleted this page in latest versions; i think all of this external stuff is out of date today and wouldn't necessarily work even if we had a v4.1 user so we can keep it removed -- SQL Guide - - nested pages of SQL info -- CLI - - functional reference page; details many commands that still exist today - - we'll need to do a special detailed analysis of command additions/changes over time as we merge this content -- Configuration - - Very similar to the configuration page we have to day split up by sections - - we'll need to do a special detailed analysis of this page as we merge versions to ensure we correctly document the version history of option additions/changes - - Introduces the naming conventions (snake_case) we still use today -- Logging - - High-level explanation of structure logger - - No JS api info - - References to config (but links to configuration page) -- Transaction Logging - - (Operations API) - - Moved to `` in the future -- Audit Logging - - (Operations API) - - Moved to `` in the future -- Async Jobs - - (Operation API) - - This is moved to Operations API / Jobs in the future - - Looks almost identical; will need to do exact argument analysis later when reviewing operations API info -- Upgrade - - At this point in the doc history this page simply details how to update the harper version globally installed via nvm or npm or whatever package manager - - It also details that harper automatically handles upgrades by just running the `harperdb` command -- Reference - - Content Types - - JSON, CBOR, MsgPack, CSV - - All of these still exist today; and there is a standalone content type page - - Data Types - - This becomes schema docs in the future - - Dynamic Schema - - This becomes schema docs in the future - - Headers - - `server-timing`, `hdb-response-time`, `content-type` - - Limits - - Schema Naming restrictions - - Table limitation (attribute maximum) - - Storage Algorithm -- Support - - This doesn't need its own page anymore. We include links to things like discord and support email in multiple places throughout docs site - -## v4.2 - -First time pages have been nested into top-level sections Getting Started, Developers, Administration, Deployments, Reference. I think we absolutely want to get rid of these top level sections as they are just a bit confusing for reference docs. Its just more decisions a new user has to figure out on their own. When instead the left sidebar should just list as many top-level topics as it reasonably can so users can visually scan. Of course not everything has to be top-level. - -- Getting Started - - Completely replaced by Learn -- Developers - - Familiar structure to todays docs containing: - - Applications - - Guide like that has been / will be replaced by Learn content - - Subsections: - - Caching - - This is a key feature - - Debugging - - This isn't necessarily a reference page; replaceable by Learn guide and cross links from configuration page (thread.debug) to Learn guide focussing on debugging - - Fastify Routes - - Should become a reference page for Fastify plugin - - Schemas - - Should become a reference page for all details of schema system - - Also likely accompanied by detailed usage implementation guide in Learn - - Examples - - Marketing wants to have a page like this likely in Learn to start - - Components - - Oof! This is a confusing section; and I remember fixing this in later docs versions. - - So this subsection details the concept of Components; related them to "Applications" and "Extensions" too but also encapsulates pages for things like "Drivers", "Google Data Studio", "SDKs", etc. - - This has its own Operations page and Installation page too - - This obviously will continue to have its own top-level section which will properly encapsulate applications, plugins, etc. (in-fact we already have the start of this in docs/reference/components now so we'll build off of that) - - Operations API - - First time having its own standalone section containing sub pages for all operations api types - - Likely want to retain something like this and ensure this is the single source of reference for all operations apis. feature pages should link to this directly - - Real-Time - - This page still exists today in a similar fashion - - Need to consider making this nested i think and having dedicated pages for MQTT, WS, etc. - - Similar to ongoing idea below, likely want to have detailed ops/config info for any of these core features in their own reference section that parallels and links to/from other pages like a general overview pages. Akin to the general config or ops api page ideas, we could have another one for Real-Time that succinctly details the subsystems available, but then links out to reference and learn content depending what user wants. - - nonetheless things like mqtt is a standalone plugin; document it as such - - but something like ws isn't exactly; its a feature of REST so ensure its appropriately organized by the plugin and well referenced for other sensible locations. - - In this regard we may not need a top-level "Real Time" page. These specific features MQTT, WS, do deserve detailed reference pages and sections, but we don't have to arbitrarily group them like this. - - REST - - should remain top level but is truly a built-in plugin. can be structured like other plugin docs - - may need to think through how to incorporate all the configuration and subfeatures of this. like ws and interconnectedness with Resource API and things like content types. this goes back to the organization of information problem that this could live under an umbrella like "HTTP" or "Networking", but is there value in having higherlevel pages or can we just list this top-level along with everything else - - Security - - This might exist in v4.1 but aligned with some of the current thinking, this section has a "Configuration" page ... is this more like what we want out of dedicated sections for features and then having detailed subsections for similar parts? - - Instead of having a whole `security/configuration` page, I believe this could live in a root, or the relative configurations should go into a more specific topic. like `security/cors` and that can contain general reference as well as specific configuration info - - Otherwise, seeing some trend of existing feature scope here like Basic auth, Cert mgmt, JWT, and Users & Roles - - So just like other places; we likely don't need to lump these all into a "Security" subsection and they could just have their own top-level reference topic. -- Administration - - Best Practices - - This info should be migrated to a learn guide - - Clone Node - - A lot of configuration info; likely need to see how this maps to overall configuration changes over versions - - Needs a learn guide for sure but also some reference for the relative configuration options or ops apis - - Studio - - This was moved around from old version and still persists today - - Jobs - - same as v4.1 page; should just exist top level or be completely folded into operations api - - Logging - - nested all three "Audit", "Standard", and "Transaction" - - again, why nest? and furthermore, most of these pages are just operations reference. -- Deployments - - Configuration File - - Good start to an important reference page. as i've written else where, I likely want to have a configuration page be more general and then list out all options but link out to specific pages for detailed description and usage patterns. - - CLI - - similar as before; good reference and could use more detail and structure - - Cloud - - remove! replaced by fabric - - Install - - this is a learn guide now; any other info should be included else where like configuration page (in a subsection about say necessary installation variables or the like) - - The "On Linux" subpage should be a learn guide if its even still relevant. - - Upgrade - - likely can be removed or more simply retained. not as much upgrade info today. - - if there is actually some sort of api feature then it can documented in reference. but its just behavior of installation or something then absolutely simplify -- Reference - - Many of the following subsections can exist as is; this is the basis for what we want this whole `/docs` section to become. - - Analytics - - this is just a table; theres a few of these "system" tables that we could detail somewhere more technically - - Architecture - - high level info that would fit better in an earlier page or in something like applications - - new learn content already has this info in it. - - v4.2 contains a relatively simple and still relevant ascii diagram we could bring back! - - Clustering - - generally just keep this as is - - this is actually a good example of the ops api pattern I want other subsystems to align with. All the nitty gritty detail is in here including ops apis and such. Any other pages with this info are light and should generally link to this. - - Content Types - - Same as before; hasn't changed much. - - notes this is specific to operations api - - how does this apply to rest endpoints and custom resources exactly? - - what about adding additional content types? (or is that a later version feature) - - Data Types - - same as before; should be folded into a schema reference - - Dynamic Schema - - as early as v4.2 we have this information disorganization where the user needs to read multiple different pages to even understand what the schema system is made of. if they missed the "defining schema" guide early on then this page and the previous make little sense. - - schemas system needs a detailed reference page! - - Globals - - beginning of some js api reference docs that are important for extensions (at this time), but now applications and plugins - - Headers - - looks like we've already removed one of the headers previously defined in v4.1 - - Limits - - same page as before; very light on information. not sure how relevant it is today - - Resource Class - - need to take a close look at this reference page especially how its evolved over latest versions. its very detailed and complete enough but as we merge versions need to take special care about documenting appropriate versions where things were added or modified. - - SQL - - same as before; likely being moved to a "legacy" or "deprecated" section in latest docs - - Storage Algorithm - - useful technical info; where is this today? Could likely be apart of a larger "DB" section or something or just "Additional Technical Details" as it doesn't have too much relevant info for app or even plugin devs. - - Transactions - - is this another global api? - - need to see what the state of this is today and ensure its represented in appropriate places like globals page - - now maybe global page needs to be high level and we need separate pages for each api within it too? like logger could exist in logger of course. all the server stuff could exist in a Networking or simply "Server" part. - -## 4.3 - -In v4.3, the docs didn't change much. There are only a couple new files `administration/compact.md` and `developers/security/mtls-auth.md`. Within the `administration/harperdb-studio/` directory, a few files changed between the versions. - -The different file paths can be retrieved using: - -```javascript -let v42 = fs.readdirSync('versioned_docs/version-4.2', { recursive: true }); -let v43 = fs.readdirSync('versioned_docs/version-4.3', { recursive: true }); -let v42_set = new Set(v42); -let v43_set = new Set(v43); -// Files removed/renamed in v43 -v42_set.difference(v43_set); -// Set(4) { -// 'administration/harperdb-studio/instance-example-code.md', -// 'administration/harperdb-studio/manage-clustering.md', -// 'administration/harperdb-studio/manage-functions.md', -// 'administration/harperdb-studio/manage-schemas-browse-data.md' -// } -// Files created/renamed in v43 -v43_set.difference(v42_set); -// Set(5) { -// 'administration/compact.md', -// 'administration/harperdb-studio/manage-applications.md', -// 'administration/harperdb-studio/manage-databases-browse-data.md', -// 'administration/harperdb-studio/manage-replication.md', -// 'developers/security/mtls-auth.md' -// } -``` - -Looking at the 4.3.0 release notes, we see a number of new features: - -- Relationships and Joins with the `@relation` custom directive in schemas -- OpenAPI specification from ops api `GET /openapi` -- General query optimizations -- Indexing `null` values enabling querying by nulls `GET /Table/?attribute=null` -- CLI expanded to support certain ops apis -- BigInt support in schema system -- Studio upgrade -- MQTT upgrades such as mTLS support, single-level wildcards, CRDT, config changes, and more. -- Storage perf improvements with compaction and compression - -There may be other changes too; but since the file structure is mostly the same we can likely utilize git `diff` to determine any notable changes to things. - -## 4.4 - -A similar analysis comparing 4.4 to 4.3 shows that there are a number of new docs files. There was a bit of moving things around and renaming some things (like harperdb-cli.md to harper-cli.md) which causes a little confusion in the file history. but notably 4.4 was when we started adding distinct files for components (like built-in.md, managing.md, and reference.md); this was the first pass at really updating the definition for components overall. Furthermore, this version contains things like native replication and so some new pages exist for that. Finally, this was also about when we started creating things like Next.js support so files like `developers/applications/web-applications.md` was added. - -Unfortunately, it looks like in [#303 (Restructure developer onboarding)](https://github.com/HarperFast/documentation/blob/ade07fd9428b0321c047ac8243ad1106bb0de2a8/versioned_sidebars/version-4.4-sidebars.json) the `developers/` tab in the sidebar was removed and has gone unnoticed for ~ 4 months. - -The paths still exist, but are just missing from the sidebar navigation. - -The 4.4.0 release note outlines all new features for this minor: - -- Native Replication (codename Plexus) which uses direct WS connections -- Replication sharding as part of the new system -- Replicated operations and rolling restarts for clustered nodes -- Computed Properties for schema system -- Custom indexing using computed properties -- Native GraphQL querying support (experimental; provisional; incomplete) -- Dynamic certificate management -- Custom resource methods can now return `Response` objects (or a Response-like object) -- Auto-increment primary keys when defined as type `Any`, `Int`, or `Long`. `ID` and `String` continue to use GUIDs. -- Installation now includes dev v prod defaults -- Exported resources can now be configured to be specifically exported by a certain protocol (REST, MQTT, etc.) for more granular control over what is exported where - -## 4.5 - -There is really only one new file in v4.5, `reference/blob.md`, but the list of features is longer than before: - -- Blob storage -- password hashing upgrade (sha256, argon2id) -- resource and storage analytics -- Default replication port was changed from 9925 to 9933 -- Expanded property access even if they aren't defined in a schema -- Storage reclamation (more of a platform feature than any kind of api) -- Expanded sharding functionality -- Certificate revocation in clustering -- Built-in `loadEnv` plugin for environment variable loading -- `cluster_status` operation updates -- Improved URL path parsing for resources -- `server.authenticateUser` API -- HTTP/2 support fo api endpoints (`http2` option) -- transactions can now be reused after calling `transaction.commit()` -- GraphQL query endpoint can be configured to listen on different ports; its also now disabled by default to avoid conflicts -- Global file handling improvements for components -- `Table.getRecordCount()` api -- Removed record counts from REST API - -## 4.6 - -There are more file changes in v4.6 docs; this is when I added numerous files for components and moved them from `developers/` to `reference/`. - -This is also when more resource reference pages were added - -In addition to that new features include: - -- Vector indexing: HNSW alg for tables -- Improvements to Extension system -- Plugin API! -- Logging improvements (granular configuration, per plugin/app configuration) -- Data Loader built-in plugin -- resource API changes (loadAsInstance and what not) -- Fixed `only-if-cached` behavior to not make a background request - -## 4.7 - -Only one new file; `'developers/security/certificate-verification.md'` - -Feature list much smaller: - -- individual component status monitoring -- OCSP support -- new analytics and licensing functionality (for Fabric) -- Plugin API changes - -## Migration Ideas - -From early on (v4.1) many features were fully controlled by ops apis. And at first they were presented based on the feature at hand. Like "Clustering", "Custom Functions", etc. and within the docs for that feature it included whatever relevant ops apis were needed. This makes me think that while we should have a technical reference for _all_ operations apis, it may be valuable to also associated specific ops apis with their relative feature. Like how is a user supposed to know if they want to do _clustering_ that they need to first look "ops apis"? Having a top level "Clustering" is valuable. That said; this is in part what the Learn section is meant to solve. Users should learn about how to Clustering via Learn guides. And then they can click through to reference pages for any other information. We also have Search in order to discover whatever specific ops apis. I think organizing the ops apis under an "Operations APIs" section is still correct but we should ensure discoverability. Maybe we don't nest it and just have them all viewable by default as soon as someone is looking at the left sidebar in Reference. - -Just from reviewing v4.1 docs it is starting to show ideal core systems to document such as CLI, Operations API, Configuration, Schemas, Logging. Like the previous paragraph stated, some thought needs to be given to how information is organized. Logger is a great example of having configuration details, usage details, and API reference details. So should all of that exist under "Logging" or should it be spread out between sections? I think the reality is we'll need a bit of "both". Where there should be top-level sections "Configuration" and "Logging". Under configuration, it should have the general info about the config file and snake_case mapping to CLI options or operations API values, and it should list out all available configuration properties in a structure way (think JSON schema). Include short descriptions, but for any actual detail around say the `logger` section, it should link out to the Logging section for further information. Like expanded descriptions for example. Additionally, any "guide" or usage like info should be delegated to learn guides. But with this thinking; how should operations apis be documented? - -Should we simplify Ops Api section to include general ops api info (requests, endpoints, w/e), and then have a table/list of available (and deprecated) ops apis with short descriptions and then links out to other docs (related to the respective feature) that details the op? - -Could we introduce some form of a "tag" system for pages? This could help with information organization as we could get rid of top-level pages like "Real-Time" or "Security" and just tag relevant sections based on some of those top-level topics. We could incorporate these tags into search or even some of navigation mechanism. This may be more satisfactory of a compromise for self-navigation. Its simpler than trying to come up with overly organized top-level sections, and is better than search (though AI search would definitely trump this). I think a fundamental issue is that users still are hesitant to use search since its traditionally such a poor experience. Now with AI baked in its improved tremendously but still users aren't gravitating towards it. Many are simply used to self-navigating and so we need to find some compromise. Going back to concept of "tags", idk if that necessarily solves that problem unless we introduce a more interactive search page. I think i'd rather just ensure that searching `"networking"` will actually return pages like HTTP, REST, MQTT, w/e. - -As I make my way through later v4 minors (4.3, 4.4, 4.5) its starting to show how the docs structure from as early as 4.2 doesn't change all too much. If I can sufficiently map out the top-level features to document, then come up with a reasonable format/structure for pages (like how to actually detail changes over versions), we should be in a really good place. Overall we'll significantly simplify the reference docs and make it much easier to maintain going into v5. We'll meet our obligation to provide "support" for existing v4 minors since we'll have changes documented. We've done an excellent job not breaking any apis over the development of v4 so in theory there shouldn't be much concern if say a v4.5 user was reading v4 docs which are more representative of latest v4.7 information but also contain notes about how things had changed for any particular part from v4.5 to v4.6 and beyond. - -The real challenge in all of this is to figure out the high-level organization of information. I've flip-flopped a bit between high-level general pages and how everything should be organized, but I think through a lot of this it seems apparent we should document individual plugins and features thus the docs will logically map to the implementation. There will obviously be some cross-cuts, but i think organizing by feature makes the most sense.